def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'fft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao,nao,nao,nao)) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti coulG = tools.get_coulG(cell, q, mesh=mydf.mesh) coords = cell.gen_uniform_grids(mydf.mesh) max_memory = mydf.max_memory - lib.current_memory()[0] #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): #:ao_pairs_G = get_ao_pairs_G(mydf, kptijkl[:2], q, compact=compact) #:ao_pairs_G *= numpy.sqrt(coulG).reshape(-1,1) #:eri = lib.dot(ao_pairs_G.T, ao_pairs_G, cell.vol/ngrids**2) ao = mydf._numint.eval_ao(cell, coords, kpti)[0] ao = numpy.asarray(ao.T, order='C') eri = _contract_compact(mydf, (ao,ao), coulG, max_memory=max_memory) if not compact: eri = ao2mo.restore(1, eri, nao).reshape(nao**2,nao**2) return eri #################### # aosym = s1, complex integrals else: #:ao_pairs_G = get_ao_pairs_G(mydf, kptijkl[:2], q, compact=False) #:# ao_pairs_invG = rho_kl(-(G+k_ij)) = conj(rho_lk(G+k_ij)).swap(r,s) #:#=get_ao_pairs_G(mydf, [kptl,kptk], q, compact=False).transpose(0,2,1).conj() #:ao_pairs_invG = get_ao_pairs_G(mydf, -kptijkl[2:], q, compact=False).conj() #:ao_pairs_G *= coulG.reshape(-1,1) #:eri = lib.dot(ao_pairs_G.T, ao_pairs_invG, cell.vol/ngrids**2) if is_zero(kpti-kptl) and is_zero(kptj-kptk): if is_zero(kpti-kptj): aoi = mydf._numint.eval_ao(cell, coords, kpti)[0] aoi = aoj = numpy.asarray(aoi.T, order='C') else: aoi, aoj = mydf._numint.eval_ao(cell, coords, kptijkl[:2]) aoi = numpy.asarray(aoi.T, order='C') aoj = numpy.asarray(aoj.T, order='C') aos = (aoi, aoj, aoj, aoi) else: aos = mydf._numint.eval_ao(cell, coords, kptijkl) aos = [numpy.asarray(x.T, order='C') for x in aos] fac = numpy.exp(-1j * numpy.dot(coords, q)) max_memory = max_memory - aos[0].nbytes*4*1e-6 eri = _contract_plain(mydf, aos, coulG, fac, max_memory=max_memory) return eri
def get_mo_pairs_G(mydf, mo_coeffs, kpts=numpy.zeros((2,3)), q=None, compact=getattr(__config__, 'pbc_df_mo_pairs_compact', False)): '''Calculate forward (G|ij) FFT of all MO pairs. Args: mo_coeff: length-2 list of (nao,nmo) ndarrays The two sets of MO coefficients to use in calculating the product |ij). Returns: mo_pairs_G : (ngrids, nmoi*nmoj) ndarray The FFT of the real-space MO pairs. ''' if kpts is None: kpts = numpy.zeros((2,3)) cell = mydf.cell kpts = numpy.asarray(kpts) coords = cell.gen_uniform_grids(mydf.mesh) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] ngrids = len(coords) def trans(aoi, aoj, fac=1): if id(aoi) == id(aoj) and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): moi = moj = numpy.asarray(lib.dot(mo_coeffs[0].T,aoi.T), order='C') else: moi = numpy.asarray(lib.dot(mo_coeffs[0].T, aoi.T), order='C') moj = numpy.asarray(lib.dot(mo_coeffs[1].T, aoj.T), order='C') mo_pairs_G = numpy.empty((nmoi,nmoj,ngrids), dtype=numpy.complex128) for i in range(nmoi): mo_pairs_G[i] = tools.fft(fac * moi[i].conj() * moj, mydf.mesh) mo_pairs_G = mo_pairs_G.reshape(-1,ngrids).T return mo_pairs_G if gamma_point(kpts): # gamma point, real ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): mo = numpy.asarray(lib.dot(mo_coeffs[0].T, ao.T), order='C') npair = nmoi*(nmoi+1)//2 mo_pairs_G = numpy.empty((npair,ngrids), dtype=numpy.complex128) ij = 0 for i in range(nmoi): mo_pairs_G[ij:ij+i+1] = tools.fft(mo[i].conj() * mo[:i+1], mydf.mesh) ij += i + 1 mo_pairs_G = mo_pairs_G.T else: mo_pairs_G = trans(ao, ao) elif is_zero(kpts[0]-kpts[1]): ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] mo_pairs_G = trans(ao, ao) else: if q is None: q = kpts[1] - kpts[0] aoi, aoj = mydf._numint.eval_ao(cell, coords, kpts) fac = numpy.exp(-1j * numpy.dot(coords, q)) mo_pairs_G = trans(aoi, aoj, fac) return mo_pairs_G
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)
def ft_loop(self, mesh=None, q=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1', intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for all kpti which satisfy 2pi*N = (kpts - kpti - q)*a, N = -1, 0, 1 ''' cell = self.cell if mesh is None: mesh = self.mesh if kpts is None: assert(is_zero(q)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert(shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni*nj blksize = max(16, int(max_memory*.9e6/(nij*nkpts*16*comp))) blksize = min(blksize, ngrids, 16384) buf = numpy.empty(nkpts*nij*blksize*comp, dtype=numpy.complex128) for p0, p1 in self.prange(0, ngrids, blksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, intor, comp, out=buf) yield dat, p0, p1
def _ewald_exxdiv_for_G0(cell, kpts, dms, vk, kpts_band=None): s = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts) madelung = tools.pbc.madelung(cell, kpts) if kpts is None: for i,dm in enumerate(dms): vk[i] += madelung * reduce(numpy.dot, (s, dm, s)) elif numpy.shape(kpts) == (3,): if kpts_band is None or is_zero(kpts_band-kpts): for i,dm in enumerate(dms): vk[i] += madelung * reduce(numpy.dot, (s, dm, s)) elif kpts_band is None or numpy.array_equal(kpts, kpts_band): for k in range(len(kpts)): for i,dm in enumerate(dms): vk[i,k] += madelung * reduce(numpy.dot, (s[k], dm[k], s[k])) else: for k, kpt in enumerate(kpts): for kp in member(kpt, kpts_band.reshape(-1,3)): for i,dm in enumerate(dms): vk[i,kp] += madelung * reduce(numpy.dot, (s[k], dm[k], s[k]))
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao.ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # Create swap file to avoid huge cderi_file. see also function # pyscf.pbc.df.df._make_j3c swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(kLR.T, kLR) j2c_k -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d' % k] = j2c_k aoaux = kLR = kLI = j2cR = j2cI = coulG = None j2c = None feri = h5py.File(cderi_file) feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): cell = mydf.cell nao = cell.nao_nr() low_dim_ft_type = cell.low_dim_ft_type kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn( cell, 'fft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao, nao, nao, nao)) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti coulG = tools.get_coulG(cell, q, mesh=mydf.mesh, low_dim_ft_type=low_dim_ft_type) coords = cell.gen_uniform_grids(mydf.mesh) max_memory = mydf.max_memory - lib.current_memory()[0] #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): #:ao_pairs_G = get_ao_pairs_G(mydf, kptijkl[:2], q, compact=compact) #:ao_pairs_G *= numpy.sqrt(coulG).reshape(-1,1) #:eri = lib.dot(ao_pairs_G.T, ao_pairs_G, cell.vol/ngrids**2) ao = mydf._numint.eval_ao(cell, coords, kpti)[0] ao = numpy.asarray(ao.T, order='C') eri = _contract_compact(mydf, (ao, ao), coulG, max_memory=max_memory) if not compact: eri = ao2mo.restore(1, eri, nao).reshape(nao**2, nao**2) return eri #################### # aosym = s1, complex integrals else: #:ao_pairs_G = get_ao_pairs_G(mydf, kptijkl[:2], q, compact=False) #:# ao_pairs_invG = rho_kl(-(G+k_ij)) = conj(rho_lk(G+k_ij)).swap(r,s) #:#=get_ao_pairs_G(mydf, [kptl,kptk], q, compact=False).transpose(0,2,1).conj() #:ao_pairs_invG = get_ao_pairs_G(mydf, -kptijkl[2:], q, compact=False).conj() #:ao_pairs_G *= coulG.reshape(-1,1) #:eri = lib.dot(ao_pairs_G.T, ao_pairs_invG, cell.vol/ngrids**2) if is_zero(kpti - kptl) and is_zero(kptj - kptk): if is_zero(kpti - kptj): aoi = mydf._numint.eval_ao(cell, coords, kpti)[0] aoi = aoj = numpy.asarray(aoi.T, order='C') else: aoi, aoj = mydf._numint.eval_ao(cell, coords, kptijkl[:2]) aoi = numpy.asarray(aoi.T, order='C') aoj = numpy.asarray(aoj.T, order='C') aos = (aoi, aoj, aoj, aoi) else: aos = mydf._numint.eval_ao(cell, coords, kptijkl) aos = [numpy.asarray(x.T, order='C') for x in aos] fac = numpy.exp(-1j * numpy.dot(coords, q)) max_memory = max_memory - aos[0].nbytes * 4 * 1e-6 eri = _contract_plain(mydf, aos, coulG, fac, max_memory=max_memory) return eri
def get_ao_pairs_G(mydf, kpts=numpy.zeros((2, 3)), q=None, shls_slice=None, compact=getattr(__config__, 'pbc_df_ao_pairs_compact', False)): '''Calculate forward (G|ij) FFT of all AO pairs. Returns: ao_pairs_G : 2D complex array For gamma point, the shape is (ngrids, nao*(nao+1)/2); otherwise the shape is (ngrids, nao*nao) ''' if kpts is None: kpts = numpy.zeros((2, 3)) cell = mydf.cell kpts = numpy.asarray(kpts) coords = cell.gen_uniform_grids(mydf.mesh) ngrids = len(coords) if shls_slice is None: i0, i1 = j0, j1 = (0, cell.nao_nr()) else: ish0, ish1, jsh0, jsh1 = shls_slice ao_loc = cell.ao_loc_nr() i0 = ao_loc[ish0] i1 = ao_loc[ish1] j0 = ao_loc[jsh0] j1 = ao_loc[jsh1] def trans(aoi, aoj, fac=1): if id(aoi) == id(aoj): aoi = aoj = numpy.asarray(aoi.T, order='C') else: aoi = numpy.asarray(aoi.T, order='C') aoj = numpy.asarray(aoj.T, order='C') ni = aoi.shape[0] nj = aoj.shape[0] ao_pairs_G = numpy.empty((ni, nj, ngrids), dtype=numpy.complex128) for i in range(ni): ao_pairs_G[i] = tools.fft(fac * aoi[i].conj() * aoj, mydf.mesh) ao_pairs_G = ao_pairs_G.reshape(-1, ngrids).T return ao_pairs_G if compact and gamma_point(kpts): # gamma point ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] ao = numpy.asarray(ao.T, order='C') npair = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 ao_pairs_G = numpy.empty((npair, ngrids), dtype=numpy.complex128) ij = 0 for i in range(i0, i1): ao_pairs_G[ij:ij + i + 1] = tools.fft(ao[i] * ao[:i + 1], mydf.mesh) ij += i + 1 ao_pairs_G = ao_pairs_G.T elif is_zero(kpts[0] - kpts[1]): ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] ao_pairs_G = trans(ao[:, i0:i1], ao[:, j0:j1]) else: if q is None: q = kpts[1] - kpts[0] aoi, aoj = mydf._numint.eval_ao(cell, coords, kpts[:2]) fac = numpy.exp(-1j * numpy.dot(coords, q)) ao_pairs_G = trans(aoi[:, i0:i1], aoj[:, j0:j1], fac) return ao_pairs_G
def sr_loop(self, kpti_kptj=numpy.zeros((2, 3)), max_memory=2000, compact=True, blksize=None): '''Short range part''' if self._cderi is None: self.build() cell = self.cell kpti, kptj = kpti_kptj unpack = is_zero(kpti - kptj) and not compact is_real = is_zero(kpti_kptj) nao = cell.nao_nr() if blksize is None: if is_real: if unpack: blksize = max_memory * 1e6 / 8 / (nao * (nao + 1) // 2 + nao**2) else: blksize = max_memory * 1e6 / 8 / (nao * (nao + 1)) else: blksize = max_memory * 1e6 / 16 / (nao**2 * 2) blksize = max(16, min(int(blksize), self.blockdim)) logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory, blksize) def load(Lpq, b0, b1, bufR, bufI): Lpq = numpy.asarray(Lpq[b0:b1]) if is_real: if unpack: LpqR = lib.unpack_tril(Lpq, out=bufR).reshape(-1, nao**2) else: LpqR = Lpq LpqI = numpy.zeros_like(LpqR) else: shape = Lpq.shape if unpack: tmp = numpy.ndarray(shape, buffer=buf) tmp[:] = Lpq.real LpqR = lib.unpack_tril(tmp, out=bufR).reshape(-1, nao**2) tmp[:] = Lpq.imag LpqI = lib.unpack_tril(tmp, lib.ANTIHERMI, out=bufI).reshape(-1, nao**2) else: LpqR = numpy.ndarray(shape, buffer=bufR) LpqR[:] = Lpq.real LpqI = numpy.ndarray(shape, buffer=bufI) LpqI[:] = Lpq.imag return LpqR, LpqI LpqR = LpqI = None with _load3c(self._cderi, 'j3c', kpti_kptj, 'j3c-kptij') as j3c: naux = j3c.shape[0] if unpack: buf = numpy.empty((min(blksize, naux), nao * (nao + 1) // 2)) for b0, b1 in lib.prange(0, naux, blksize): LpqR, LpqI = load(j3c, b0, b1, LpqR, LpqI) yield LpqR, LpqI, 1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': # Truncated Coulomb operator is not postive definite. Load the # CDERI tensor of negative part. LpqR = LpqI = None with _load3c(self._cderi, 'j3c-', kpti_kptj, 'j3c-kptij', ignore_key_error=True) as j3c: naux = j3c.shape[0] if unpack: buf = numpy.empty((min(blksize, naux), nao * (nao + 1) // 2)) for b0, b1 in lib.prange(0, naux, blksize): LpqR, LpqI = load(j3c, b0, b1, LpqR, LpqI) yield LpqR, LpqI, -1
def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mesh) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) #buf1R = numpy.empty((blksize*nao**2)) #buf1I = numpy.empty((blksize*nao**2)) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): nG = p1 - p0 bufR = numpy.empty((nG*nao**2)) bufI = numpy.empty((nG*nao**2)) buf1R = numpy.empty((nG*nao**2)) buf1I = numpy.empty((nG*nao**2)) for k, aoao in enumerate(aoaoks): ki = kpti_idx[k] kj = kptj_idx[k] # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = numpy.ndarray((nao,nG,nao), buffer=bufR) pLqI = numpy.ndarray((nao,nG,nao), buffer=bufI) pLqR[:] = aoao.real.reshape(nG,nao,nao).transpose(1,0,2) pLqI[:] = aoao.imag.reshape(nG,nao,nao).transpose(1,0,2) iLkR = numpy.ndarray((nao,nG,nao), buffer=buf1R) iLkI = numpy.ndarray((nao,nG,nao), buffer=buf1I) for i in range(nset): zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): for i in range(nset): zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR.reshape(nao,-1), iLkI.reshape(nao,-1)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1)
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs, ) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = None return eri_mo elif is_zero(kpti - kptk) and is_zero(kptj - kptl): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti - kptl) and is_zero(kptj - kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) zij = zkl = None for (LpqR, LpqI), (LrsR, LrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zij, zkl = _ztrans(LpqR + LpqI * 1j, zij, moij, ijslice, LrsR + LrsI * 1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def get_eri(mydf, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max( 2000, mydf.max_memory - lib.current_memory()[0] - nao**4 * 8 / 1e6) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair, nao_pair)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(LpqR.T, LpqR, 1, eriR, 1) LpqR = LpqI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1) return eriR elif is_zero(kpti - kptk) and is_zero(kptj - kptl): eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None return eriR + eriI * 1j #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif is_zero(kpti - kptl) and is_zero(kptj - kptk): eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1)) return eri.reshape(nao**2, -1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for (LpqR, LpqI), (LrsR, LrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) LpqR = LpqI = LrsR = LrsI = None return eriR + eriI * 1j
def _ft_aopair_kpts(cell, Gv, shls_slice=None, aosym='s1', b=None, gxyz=None, Gvbase=None, q=numpy.zeros(3), kptjs=numpy.zeros((1, 3)), intor='GTO_ft_ovlp_sph', comp=1, out=None): r''' FT transform AO pair \sum_T exp(-i k_j * T) \int exp(-i(G+q)r) i(r) j(r-T) dr^3 The return array holds the AO pair corresponding to the kpoints given by kptjs ''' q = numpy.reshape(q, 3) kptjs = numpy.asarray(kptjs, order='C').reshape(-1, 3) nGv = Gv.shape[0] GvT = numpy.asarray(Gv.T, order='C') GvT += q.reshape(-1, 1) if (gxyz is None or b is None or Gvbase is None or (abs(q).sum() > 1e-9) # backward compatibility for pyscf-1.2, in which the argument Gvbase is gs or (Gvbase is not None and isinstance(Gvbase[0], (int, numpy.integer)))): p_gxyzT = lib.c_null_ptr() p_gs = (ctypes.c_int * 3)(0, 0, 0) p_b = (ctypes.c_double * 1)(0) eval_gz = 'GTO_Gv_general' else: if abs(b - numpy.diag(b.diagonal())).sum() < 1e-8: eval_gz = 'GTO_Gv_orth' else: eval_gz = 'GTO_Gv_nonorth' gxyzT = numpy.asarray(gxyz.T, order='C', dtype=numpy.int32) p_gxyzT = gxyzT.ctypes.data_as(ctypes.c_void_p) b = numpy.hstack((b.ravel(), q) + Gvbase) p_b = b.ctypes.data_as(ctypes.c_void_p) p_gs = (ctypes.c_int * 3)(*[len(x) for x in Gvbase]) Ls = cell.get_lattice_Ls() expkL = numpy.exp(1j * numpy.dot(kptjs, Ls.T)) atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, cell._atm, cell._bas, cell._env) ao_loc = gto.moleintor.make_loc(bas, intor) if shls_slice is None: shls_slice = (0, cell.nbas, cell.nbas, cell.nbas * 2) else: shls_slice = (shls_slice[0], shls_slice[1], cell.nbas + shls_slice[2], cell.nbas + shls_slice[3]) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nkpts = len(kptjs) nimgs = len(Ls) shape = (nkpts, comp, ni, nj, nGv) # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # hermi operation needs reordering the axis-0. It is inefficient. if aosym == 's1hermi': # Symmetry for Gamma point assert (is_zero(q) and is_zero(kptjs) and ni == nj) elif aosym == 's2': i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 shape = (nkpts, comp, nij, nGv) drv = libpbc.PBC_ft_latsum_drv intor = getattr(libpbc, intor) eval_gz = getattr(libpbc, eval_gz) if nkpts == 1: fill = getattr(libpbc, 'PBC_ft_fill_nk1' + aosym) else: fill = getattr(libpbc, 'PBC_ft_fill_k' + aosym) out = numpy.ndarray(shape, dtype=numpy.complex128, buffer=out) drv(intor, eval_gz, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), GvT.ctypes.data_as(ctypes.c_void_p), p_b, p_gxyzT, p_gs, ctypes.c_int(nGv), atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.nbas), env.ctypes.data_as(ctypes.c_void_p)) if aosym == 's1hermi': for i in range(1, ni): out[:, :, :i, i] = out[:, :, i, :i] out = numpy.rollaxis(out, -1, 2) if comp == 1: out = out[:, 0] return out
def ft_aopair_kpts(cell, Gv, shls_slice=None, aosym='s1', b=None, gxyz=None, Gvbase=None, q=numpy.zeros(3), kptjs=numpy.zeros((1, 3)), intor='GTO_ft_ovlp', comp=1, bvk_kmesh=None, out=None): r''' Fourier transform AO pair for a group of k-points \sum_T exp(-i k_j * T) \int exp(-i(G+q)r) i(r) j(r-T) dr^3 The return array holds the AO pair corresponding to the kpoints given by kptjs ''' intor = cell._add_suffix(intor) q = numpy.reshape(q, 3) kptjs = numpy.asarray(kptjs, order='C').reshape(-1, 3) Gv = numpy.asarray(Gv, order='C').reshape(-1, 3) nGv = Gv.shape[0] GvT = numpy.asarray(Gv.T, order='C') GvT += q.reshape(-1, 1) if (gxyz is None or b is None or Gvbase is None or (abs(q).sum() > 1e-9) # backward compatibility for pyscf-1.2, in which the argument Gvbase is gs or (Gvbase is not None and isinstance(Gvbase[0], (int, numpy.integer)))): p_gxyzT = lib.c_null_ptr() p_mesh = (ctypes.c_int * 3)(0, 0, 0) p_b = (ctypes.c_double * 1)(0) eval_gz = 'GTO_Gv_general' else: if abs(b - numpy.diag(b.diagonal())).sum() < 1e-8: eval_gz = 'GTO_Gv_orth' else: eval_gz = 'GTO_Gv_nonorth' gxyzT = numpy.asarray(gxyz.T, order='C', dtype=numpy.int32) p_gxyzT = gxyzT.ctypes.data_as(ctypes.c_void_p) b = numpy.hstack((b.ravel(), q) + Gvbase) p_b = b.ctypes.data_as(ctypes.c_void_p) p_mesh = (ctypes.c_int * 3)(*[len(x) for x in Gvbase]) Ls = cell.get_lattice_Ls() Ls = Ls[numpy.linalg.norm(Ls, axis=1).argsort()] nkpts = len(kptjs) nimgs = len(Ls) nbas = cell.nbas if bvk_kmesh is None: expkL = numpy.exp(1j * numpy.dot(kptjs, Ls.T)) else: ovlp_mask = _estimate_overlap(cell, Ls) > cell.precision ovlp_mask = numpy.asarray(ovlp_mask, dtype=numpy.int8, order='C') # Using Ls = translations.dot(a) translations = numpy.linalg.solve(cell.lattice_vectors().T, Ls.T) # t_mod is the translations inside the BvK cell t_mod = translations.round(3).astype(int) % numpy.asarray( bvk_kmesh)[:, None] cell_loc_bvk = numpy.ravel_multi_index(t_mod, bvk_kmesh).astype(numpy.int32) bvkmesh_Ls = k2gamma.translation_vectors_for_kmesh(cell, bvk_kmesh) expkL = numpy.exp(1j * numpy.dot(kptjs, bvkmesh_Ls.T)) atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, cell._atm, cell._bas, cell._env) ao_loc = gto.moleintor.make_loc(bas, intor) if shls_slice is None: shls_slice = (0, nbas, nbas, nbas * 2) else: shls_slice = (shls_slice[0], shls_slice[1], nbas + shls_slice[2], nbas + shls_slice[3]) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] shape = (nkpts, comp, ni, nj, nGv) # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # hermi operation needs reordering the axis-0. It is inefficient. if aosym == 's1hermi': # Symmetry for Gamma point assert (is_zero(q) and is_zero(kptjs) and ni == nj) elif aosym == 's2': i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 shape = (nkpts, comp, nij, nGv) cintor = getattr(libpbc, intor) eval_gz = getattr(libpbc, eval_gz) out = numpy.ndarray(shape, dtype=numpy.complex128, buffer=out) if bvk_kmesh is None: if nkpts == 1: fill = getattr(libpbc, 'PBC_ft_fill_nk1' + aosym) else: fill = getattr(libpbc, 'PBC_ft_fill_k' + aosym) drv = libpbc.PBC_ft_latsum_drv drv(cintor, eval_gz, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), GvT.ctypes.data_as(ctypes.c_void_p), p_b, p_gxyzT, p_mesh, ctypes.c_int(nGv), atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.nbas), env.ctypes.data_as(ctypes.c_void_p)) else: if nkpts == 1: fill = getattr(libpbc, 'PBC_ft_bvk_nk1' + aosym) else: fill = getattr(libpbc, 'PBC_ft_bvk_k' + aosym) drv = libpbc.PBC_ft_bvk_drv drv(cintor, eval_gz, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), ctypes.c_int(expkL.shape[1]), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), cell_loc_bvk.ctypes.data_as(ctypes.c_void_p), ovlp_mask.ctypes.data_as(ctypes.c_void_p), GvT.ctypes.data_as(ctypes.c_void_p), p_b, p_gxyzT, p_mesh, ctypes.c_int(nGv), atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.nbas), env.ctypes.data_as(ctypes.c_void_p)) if aosym == 's1hermi': for i in range(1, ni): out[:, :, :i, i] = out[:, :, i, :i] out = numpy.rollaxis(out, -1, 2) if comp == 1: out = out[:, 0] return out
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): if mydf._cderi is None: mydf.build() cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao,nao,nao,nao)) kpti, kptj, kptk, kptl = kptijkl nao_pair = nao * (nao+1) // 2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair,nao_pair)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(LpqR.T, LpqR, sign, eriR, 1) LpqR = LpqI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR elif is_zero(kpti-kptk) and is_zero(kptj-kptl): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1) LpqR = LpqI = None return eriR + eriI*1j #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif is_zero(kpti-kptl) and is_zero(kptj-kptk): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1) LpqR = LpqI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1)) return eri.reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) blksize = int(max_memory*.4e6/16/nao**2) for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize), mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)): zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, sign, eriR, eriI, 1) LpqR = LpqI = LrsR = LrsI = None return eriR + eriI*1j
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])
def aux_e2(cell, auxcell, intor='int3c2e', aosym='s1', comp=None, kptij_lst=numpy.zeros((1, 2, 3)), shls_slice=None, **kwargs): r'''3-center AO integrals (ij|L) with double lattice sum: \sum_{lm} (i[l]j[m]|L[0]), where L is the auxiliary basis. Returns: (nao_pair, naux) array ''' # For some unkown reasons, the pre-decontracted basis 'is slower than # if shls_slice is None and cell.nao_nr() < 200: ## Slighly decontract basis. The decontracted basis has better locality. ## The locality can be used in the lattice sum to reduce cost. # cell, contr_coeff = pbcgto.cell._split_basis(cell) # else: # contr_coeff = None intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor), comp) if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas) ao_loc = cell.ao_loc_nr() aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor)[:shls_slice[5] + 1] ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] naux = aux_loc[shls_slice[5]] - aux_loc[shls_slice[4]] nkptij = len(kptij_lst) kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] j_only = is_zero(kpti - kptj) if j_only and aosym[:2] == 's2': assert (shls_slice[2] == 0) nao_pair = (ao_loc[shls_slice[1]] * (ao_loc[shls_slice[1]] + 1) // 2 - ao_loc[shls_slice[0]] * (ao_loc[shls_slice[0]] + 1) // 2) else: nao_pair = ni * nj if gamma_point(kptij_lst): dtype = numpy.double else: dtype = numpy.complex128 int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst, **kwargs) out = numpy.empty((nkptij, comp, nao_pair, naux), dtype=dtype) out = int3c(shls_slice, out) # if contr_coeff is not None: # if aosym == 's2': # tmp = out.reshape(nkptij,comp,ni,ni,naux) # idx, idy = numpy.tril_indices(ni) # tmp[:,:,idy,idx] = out.conj() # tmp[:,:,idx,idy] = out # out, tmp = tmp, None # out = lib.einsum('kcpql,pi->kciql', out, contr_coeff) # out = lib.einsum('kciql,qj->kcijl', out, contr_coeff) # idx, idy = numpy.tril_indices(contr_coeff.shape[1]) # out = out[:,:,idx,idy] # else: # out = out.reshape(nkptij,comp,ni,nj,naux) # out = lib.einsum('kcpql,pi->kciql', out, contr_coeff) # out = lib.einsum('kciql,qj->kcijl', out, contr_coeff) # out = out.reshape(nkptij,comp,-1,naux) if comp == 1: out = out[:, 0] if nkptij == 1: out = out[0] return out
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0])) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, sign, eri_mo, 1) LpqR = LpqI = None return eri_mo elif is_zero(kpti-kptk) and is_zero(kptj-kptl): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), sign, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] nao = mo_coeffs[0].shape[0] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) blksize = int(min(max_memory*.3e6/16/nij_pair, max_memory*.3e6/16/nkl_pair, max_memory*.3e6/16/nao**2)) zij = zkl = None for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize), mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: hermi : int Whether K matrix is hermitian | 0 : not hermitian and not symmetric | 1 : hermitian kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / ngrids / nao))) logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) #ao1_dtype = np.result_type(*ao1_kpts) #ao2_dtype = np.result_type(*ao2_kpts) vR_dm = np.empty((nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (logger.process_clock(), logger.perf_counter()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T) t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Function _ewald_exxdiv_for_G0 to add back in the G=0 component to vk_kpts # Note in the _ewald_exxdiv_for_G0 implementation, the G=0 treatments are # different for 1D/2D and 3D systems. The special treatments for 1D and 2D # can only be used with AFTDF/GDF/MDF method. In the FFTDF method, 1D, 2D # and 3D should use the ewald probe charge correction. if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d'%ji][:naux0,col0:col1] = v del(feri['j2c/%d'%uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d'%ji][:naux0] del(feri['j3c/%d'%ji]) feri['j3c/%d'%ji] = v
def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Derivatives of exchange (K) AO matrix at sampled k-points. ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords if input_band is None: ao2_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts, deriv=1) ] ao1_kpts = ao2_kpts ao2_kpts = [ao2_kpt[0] for ao2_kpt in ao2_kpts] else: ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] ao1_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band, deriv=1) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / 3 / ngrids / nao))) logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) vR_dm = np.empty((3, nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (logger.process_clock(), logger.perf_counter()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('aig,jg->aijg', ao1T[1:, p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(3, p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('aijg,jg->aig', vR, ao_dms[i], out=vR_dm[:, i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[:, i, k1] -= weight * np.einsum( 'aig,jg->aij', vR_dm[:, i], ao1T[0]) t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Ewald correction has no contribution to nuclear gradient unless range separted Coulomb is used # The gradient correction part is not added in the vk matrix if exxdiv == 'ewald' and cell.omega != 0: raise NotImplementedError("Range Separated Coulomb") # when cell.omega !=0: madelung constant will have a non-zero derivative vk_kpts = np.asarray( [_format_jks(vk, dm_kpts, input_band, kpts) for vk in vk_kpts]) return vk_kpts
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # Create swap file to avoid huge cderi_file. see also function # pyscf.pbc.df.df._make_j3c swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = mydf.weighted_coulG(kpt, False, mesh) LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(LkR * coulG, LkR.T) j2c_k -= lib.dot(LkI * coulG, LkI.T) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(LkR * coulG, LkI * coulG, LkR.T, LkI.T) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d' % k] = j2c_k aoaux = LkR = LkI = j2cR = j2cI = coulG = None j2c = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file) feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx, kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx, kpti_idx] = False max_memory1 = max_memory * (nkptj + 1) / (nkptj + 5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) #buf1R = numpy.empty((blksize*nao**2)) #buf1I = numpy.empty((blksize*nao**2)) for aoaoks, p0, p1 in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): coulG = numpy.sqrt(vkcoulG[p0:p1]) nG = p1 - p0 bufR = numpy.empty((nG * nao**2)) bufI = numpy.empty((nG * nao**2)) buf1R = numpy.empty((nG * nao**2)) buf1I = numpy.empty((nG * nao**2)) for k, aoao in enumerate(aoaoks): ki = kpti_idx[k] kj = kptj_idx[k] # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = numpy.ndarray((nao, nG, nao), buffer=bufR) pLqI = numpy.ndarray((nao, nG, nao), buffer=bufI) pLqR[:] = aoao.real.reshape(nG, nao, nao).transpose(1, 0, 2) pLqI[:] = aoao.imag.reshape(nG, nao, nao).transpose(1, 0, 2) pLqR *= coulG.reshape(1, nG, 1) pLqI *= coulG.reshape(1, nG, 1) iLkR = numpy.ndarray((nao * nG, nao), buffer=buf1R) iLkI = numpy.ndarray((nao * nG, nao), buffer=buf1I) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1, nao), pLqI.reshape(-1, nao), dmsR[i, kj], dmsI[i, kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao, -1), iLkI.reshape(nao, -1), pLqR.reshape(nao, -1).T, pLqI.reshape(nao, -1).T, 1, vkR[i, ki], vkI[i, ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao, -1) iLkI = iLkI.reshape(nao, -1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i, ki], dmsI[i, ki], pLqR.reshape(nao, -1), pLqI.reshape(nao, -1), 1, iLkR, iLkI) zdotCN( pLqR.reshape(-1, nao).T, pLqI.reshape(-1, nao).T, iLkR.reshape(-1, nao), iLkI.reshape(-1, nao), 1, vkR[i, kj], vkI[i, kj], 1)
def get_mo_pairs_G(mydf, mo_coeffs, kpts=numpy.zeros((2, 3)), q=None, compact=getattr(__config__, 'pbc_df_mo_pairs_compact', False)): '''Calculate forward (G|ij) FFT of all MO pairs. Args: mo_coeff: length-2 list of (nao,nmo) ndarrays The two sets of MO coefficients to use in calculating the product |ij). Returns: mo_pairs_G : (ngrids, nmoi*nmoj) ndarray The FFT of the real-space MO pairs. ''' if kpts is None: kpts = numpy.zeros((2, 3)) cell = mydf.cell kpts = numpy.asarray(kpts) coords = cell.gen_uniform_grids(mydf.mesh) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] ngrids = len(coords) def trans(aoi, aoj, fac=1): if id(aoi) == id(aoj) and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): moi = moj = numpy.asarray(lib.dot(mo_coeffs[0].T, aoi.T), order='C') else: moi = numpy.asarray(lib.dot(mo_coeffs[0].T, aoi.T), order='C') moj = numpy.asarray(lib.dot(mo_coeffs[1].T, aoj.T), order='C') mo_pairs_G = numpy.empty((nmoi, nmoj, ngrids), dtype=numpy.complex128) for i in range(nmoi): mo_pairs_G[i] = tools.fft(fac * moi[i].conj() * moj, mydf.mesh) mo_pairs_G = mo_pairs_G.reshape(-1, ngrids).T return mo_pairs_G if gamma_point(kpts): # gamma point, real ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): mo = numpy.asarray(lib.dot(mo_coeffs[0].T, ao.T), order='C') npair = nmoi * (nmoi + 1) // 2 mo_pairs_G = numpy.empty((npair, ngrids), dtype=numpy.complex128) ij = 0 for i in range(nmoi): mo_pairs_G[ij:ij + i + 1] = tools.fft( mo[i].conj() * mo[:i + 1], mydf.mesh) ij += i + 1 mo_pairs_G = mo_pairs_G.T else: mo_pairs_G = trans(ao, ao) elif is_zero(kpts[0] - kpts[1]): ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] mo_pairs_G = trans(ao, ao) else: if q is None: q = kpts[1] - kpts[0] aoi, aoj = mydf._numint.eval_ao(cell, coords, kpts) fac = numpy.exp(-1j * numpy.dot(coords, q)) mo_pairs_G = trans(aoi, aoj, fac) return mo_pairs_G
def wrap_int3c(cell, auxcell, intor='int3c2e_sph', aosym='s1', comp=1, kptij_lst=numpy.zeros((1, 2, 3))): nbas = cell.nbas atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, cell._atm, cell._bas, cell._env) ao_loc = gto.moleintor.make_loc(bas, intor) aux_loc = auxcell.ao_loc_nr('ssc' in intor) ao_loc = numpy.asarray(numpy.hstack([ao_loc, ao_loc[-1] + aux_loc[1:]]), dtype=numpy.int32) atm, bas, env = gto.conc_env(atm, bas, env, auxcell._atm, auxcell._bas, auxcell._env) Ls = cell.get_lattice_Ls() nimgs = len(Ls) kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] if gamma_point(kptij_lst): kk_type = 'g' dtype = numpy.double nkpts = nkptij = 1 kptij_idx = numpy.array([0], dtype=numpy.int32) expkL = numpy.ones(1) elif is_zero(kpti - kptj): # j_only kk_type = 'k' dtype = numpy.complex128 kpts = kptij_idx = numpy.asarray(kpti, order='C') expkL = numpy.exp(1j * numpy.dot(kpts, Ls.T)) nkpts = nkptij = len(kpts) else: kk_type = 'kk' dtype = numpy.complex128 kpts = unique(numpy.vstack([kpti, kptj]))[0] expkL = numpy.exp(1j * numpy.dot(kpts, Ls.T)) wherei = numpy.where( abs(kpti.reshape(-1, 1, 3) - kpts).sum(axis=2) < KPT_DIFF_TOL)[1] wherej = numpy.where( abs(kptj.reshape(-1, 1, 3) - kpts).sum(axis=2) < KPT_DIFF_TOL)[1] nkpts = len(kpts) kptij_idx = numpy.asarray(wherei * nkpts + wherej, dtype=numpy.int32) nkptij = len(kptij_lst) fill = 'PBCnr3c_fill_%s%s' % (kk_type, aosym[:2]) drv = libpbc.PBCnr3c_drv cintopt = _vhf.make_cintopt(atm, bas, env, intor) # Remove the precomputed pair data because the pair data corresponds to the # integral of cell #0 while the lattice sum moves shls to all repeated images. libpbc.CINTdel_pairdata_optimizer(cintopt) def int3c(shls_slice, out): shls_slice = (shls_slice[0], shls_slice[1], nbas + shls_slice[2], nbas + shls_slice[3], nbas * 2 + shls_slice[4], nbas * 2 + shls_slice[5]) drv( getattr(libpbc, intor), getattr(libpbc, fill), out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkptij), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), kptij_idx.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 6)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas), # need to pass cell.nbas to libpbc.PBCnr3c_drv env.ctypes.data_as(ctypes.c_void_p)) return out return int3c
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def get_eri(mydf, kpts=None, compact=True): cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti coulG = mydf.weighted_coulG(q, False, mydf.gs) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair, nao_pair)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.ddot(pqkR, pqkR.T, 1, eriR, 1) lib.ddot(pqkI, pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1) return eriR #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # # complex integrals, N^4 elements elif is_zero(kpti - kptl) and is_zero(kptj - kptk): eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None pqkR = pqkI = coulG = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1)) return eri.reshape(nao**2, -1) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) # rho_rs(-G-k) = rho_rs(conj(G+k)) = conj(rho_sr(G+k)) for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] # rho_pq(G+k_pq) * conj(rho_sr(G+k_pq)) zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return (eriR + eriI * 1j)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (logger.process_clock(), logger.perf_counter()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao.ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs, ) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn( cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI, 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti - kptl) and is_zero(kptj - kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) buf *= numpy.sqrt(coulG[p0:p1]).reshape(-1, 1) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR - rskI * 1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1].reshape(-1, 1) lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])
def sr_loop(self, kpti_kptj=numpy.zeros((2,3)), max_memory=2000, compact=True, blksize=None): '''Short range part''' if self._cderi is None: self.build() cell = self.cell kpti, kptj = kpti_kptj unpack = is_zero(kpti-kptj) and not compact is_real = is_zero(kpti_kptj) nao = cell.nao_nr() if blksize is None: if is_real: if unpack: blksize = max_memory*1e6/8/(nao*(nao+1)//2+nao**2) else: blksize = max_memory*1e6/8/(nao*(nao+1)) else: blksize = max_memory*1e6/16/(nao**2*2) blksize = max(16, min(int(blksize), self.blockdim)) logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory, blksize) if unpack: buf = numpy.empty((blksize,nao*(nao+1)//2)) def load(Lpq, b0, b1, bufR, bufI): Lpq = numpy.asarray(Lpq[b0:b1]) if is_real: if unpack: LpqR = lib.unpack_tril(Lpq, out=bufR).reshape(-1,nao**2) else: LpqR = Lpq LpqI = numpy.zeros_like(LpqR) else: shape = Lpq.shape if unpack: tmp = numpy.ndarray(shape, buffer=buf) tmp[:] = Lpq.real LpqR = lib.unpack_tril(tmp, out=bufR).reshape(-1,nao**2) tmp[:] = Lpq.imag LpqI = lib.unpack_tril(tmp, lib.ANTIHERMI, out=bufI).reshape(-1,nao**2) else: LpqR = numpy.ndarray(shape, buffer=bufR) LpqR[:] = Lpq.real LpqI = numpy.ndarray(shape, buffer=bufI) LpqI[:] = Lpq.imag return LpqR, LpqI LpqR = LpqI = None with _load3c(self._cderi, 'j3c', kpti_kptj, 'j3c-kptij') as j3c: naux = j3c.shape[0] for b0, b1 in lib.prange(0, naux, blksize): LpqR, LpqI = load(j3c, b0, b1, LpqR, LpqI) yield LpqR, LpqI, 1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': # Truncated Coulomb operator is not postive definite. Load the # CDERI tensor of negative part. LpqR = LpqI = None with _load3c(self._cderi, 'j3c-', kpti_kptj, 'j3c-kptij', ignore_key_error=True) as j3c: naux = j3c.shape[0] for b0, b1 in lib.prange(0, naux, blksize): LpqR, LpqI = load(j3c, b0, b1, LpqR, LpqI) yield LpqR, LpqI, -1
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR*coulG[p0:p1,None], 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj()*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR-rskI*1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1,None] lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn( cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao, nao, nao, nao)) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair, nao_pair)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.ddot(pqkR, pqkR.T, 1, eriR, 1) lib.ddot(pqkI, pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1) return eriR #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # # complex integrals, N^4 elements elif is_zero(kpti - kptl) and is_zero(kptj - kptk): eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None pqkR = pqkI = coulG = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1)) return eri.reshape(nao**2, -1) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) # # (pq|rs) = \sum_G 4\pi rho_pq rho_rs / |G+k_{pq}|^2 # rho_pq = 1/N \sum_{Tp,Tq} \int exp(-i(G+k_{pq})*r) p(r-Tp) q(r-Tq) dr # = \sum_{Tq} exp(i k_q*Tq) \int exp(-i(G+k_{pq})*r) p(r) q(r-Tq) dr # Note the k-point wrap-around for rho_rs, which leads to G+k_{pq} in FT # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = \sum_{Ts} exp(i k_s*Ts) \int exp( i(G+k_{pq})*r) r(r) s(r-Ts) dr # rho_pq can be directly evaluated by AFT (function pw_loop) # rho_pq = pw_loop(k_q, G+k_{pq}) # Assuming r(r) and s(r) are real functions, rho_rs is evaluated # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = conj(\sum_{Ts} exp(-i k_s*Ts) \int exp(-i(G+k_{pq})*r) r(r) s(r-Ts) dr) # = conj( pw_loop(-k_s, G+k_{pq}) ) # # TODO: For complex AO function r(r) and s(r), pw_loop function needs to be # extended to include Gv vector in the arguments for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return (eriR + eriI * 1j)
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao,nao,nao,nao)) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) nao_pair = nao * (nao+1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair,nao_pair)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): lib.ddot(pqkR*coulG[p0:p1], pqkR.T, 1, eriR, 1) lib.ddot(pqkI*coulG[p0:p1], pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # # complex integrals, N^4 elements elif is_zero(kpti-kptl) and is_zero(kptj-kptk): eriR = numpy.zeros((nao**2,nao**2)) eriI = numpy.zeros((nao**2,nao**2)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR*coulG[p0:p1], pqkI*coulG[p0:p1], pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None pqkR = pqkI = coulG = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1)) return eri.reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: eriR = numpy.zeros((nao**2,nao**2)) eriI = numpy.zeros((nao**2,nao**2)) # # (pq|rs) = \sum_G 4\pi rho_pq rho_rs / |G+k_{pq}|^2 # rho_pq = 1/N \sum_{Tp,Tq} \int exp(-i(G+k_{pq})*r) p(r-Tp) q(r-Tq) dr # = \sum_{Tq} exp(i k_q*Tq) \int exp(-i(G+k_{pq})*r) p(r) q(r-Tq) dr # Note the k-point wrap-around for rho_rs, which leads to G+k_{pq} in FT # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = \sum_{Ts} exp(i k_s*Ts) \int exp( i(G+k_{pq})*r) r(r) s(r-Ts) dr # rho_pq can be directly evaluated by AFT (function pw_loop) # rho_pq = pw_loop(k_q, G+k_{pq}) # Assuming r(r) and s(r) are real functions, rho_rs is evaluated # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = conj(\sum_{Ts} exp(-i k_s*Ts) \int exp(-i(G+k_{pq})*r) r(r) s(r-Ts) dr) # = conj( pw_loop(-k_s, G+k_{pq}) ) # # TODO: For complex AO function r(r) and s(r), pw_loop function needs to be # extended to include Gv vector in the arguments for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return (eriR+eriI*1j)
def get_ao_pairs_G(mydf, kpts=numpy.zeros((2,3)), q=None, shls_slice=None, compact=getattr(__config__, 'pbc_df_ao_pairs_compact', False)): '''Calculate forward (G|ij) FFT of all AO pairs. Returns: ao_pairs_G : 2D complex array For gamma point, the shape is (ngrids, nao*(nao+1)/2); otherwise the shape is (ngrids, nao*nao) ''' if kpts is None: kpts = numpy.zeros((2,3)) cell = mydf.cell kpts = numpy.asarray(kpts) coords = cell.gen_uniform_grids(mydf.mesh) ngrids = len(coords) if shls_slice is None: i0, i1 = j0, j1 = (0, cell.nao_nr()) else: ish0, ish1, jsh0, jsh1 = shls_slice ao_loc = cell.ao_loc_nr() i0 = ao_loc[ish0] i1 = ao_loc[ish1] j0 = ao_loc[jsh0] j1 = ao_loc[jsh1] def trans(aoi, aoj, fac=1): if id(aoi) == id(aoj): aoi = aoj = numpy.asarray(aoi.T, order='C') else: aoi = numpy.asarray(aoi.T, order='C') aoj = numpy.asarray(aoj.T, order='C') ni = aoi.shape[0] nj = aoj.shape[0] ao_pairs_G = numpy.empty((ni,nj,ngrids), dtype=numpy.complex128) for i in range(ni): ao_pairs_G[i] = tools.fft(fac * aoi[i].conj() * aoj, mydf.mesh) ao_pairs_G = ao_pairs_G.reshape(-1,ngrids).T return ao_pairs_G if compact and gamma_point(kpts): # gamma point ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] ao = numpy.asarray(ao.T, order='C') npair = i1*(i1+1)//2 - i0*(i0+1)//2 ao_pairs_G = numpy.empty((npair,ngrids), dtype=numpy.complex128) ij = 0 for i in range(i0, i1): ao_pairs_G[ij:ij+i+1] = tools.fft(ao[i] * ao[:i+1], mydf.mesh) ij += i + 1 ao_pairs_G = ao_pairs_G.T elif is_zero(kpts[0]-kpts[1]): ao = mydf._numint.eval_ao(cell, coords, kpts[:1])[0] ao_pairs_G = trans(ao[:,i0:i1], ao[:,j0:j1]) else: if q is None: q = kpts[1] - kpts[0] aoi, aoj = mydf._numint.eval_ao(cell, coords, kpts[:2]) fac = numpy.exp(-1j * numpy.dot(coords, q)) ao_pairs_G = trans(aoi[:,i0:i1], aoj[:,j0:j1], fac) return ao_pairs_G
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None if cell.dimension == 1 or cell.dimension == 2: plain_ints = _gaussian_int(fused_cell) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge( cell, Gv[p0:p1]) aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints) aoaux = aoaux.T LkR = aoaux.real * coulG[p0:p1] LkI = aoaux.imag * coulG[p0:p1] aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge( cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum( 'g,i->gi', SI_on_z, ovlp[k]) aux = fuse( ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:, None].real * ovlp[k] else: tmp = vG_mod[:, None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge( cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum( 'g,i->gi', SI_on_z, ovlp[k]) aux = fuse( ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:, None].real * ovlp[k] else: tmp = vG_mod[:, None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge( cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum( 'g,i->gi', SI_on_z, ovlp[k]) aux = fuse( ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:, None].real * ovlp[k] else: tmp = vG_mod[:, None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v
def _ft_aopair_kpts(cell, Gv, shls_slice=None, aosym='s1', b=None, gxyz=None, Gvbase=None, q=numpy.zeros(3), kptjs=numpy.zeros((1,3)), intor='GTO_ft_ovlp', comp=1, out=None): r''' FT transform AO pair \sum_T exp(-i k_j * T) \int exp(-i(G+q)r) i(r) j(r-T) dr^3 The return array holds the AO pair corresponding to the kpoints given by kptjs ''' intor = cell._add_suffix(intor) q = numpy.reshape(q, 3) kptjs = numpy.asarray(kptjs, order='C').reshape(-1,3) Gv = numpy.asarray(Gv, order='C').reshape(-1,3) nGv = Gv.shape[0] GvT = numpy.asarray(Gv.T, order='C') GvT += q.reshape(-1,1) if (gxyz is None or b is None or Gvbase is None or (abs(q).sum() > 1e-9) # backward compatibility for pyscf-1.2, in which the argument Gvbase is gs or (Gvbase is not None and isinstance(Gvbase[0], (int, numpy.integer)))): p_gxyzT = lib.c_null_ptr() p_mesh = (ctypes.c_int*3)(0,0,0) p_b = (ctypes.c_double*1)(0) eval_gz = 'GTO_Gv_general' else: if abs(b-numpy.diag(b.diagonal())).sum() < 1e-8: eval_gz = 'GTO_Gv_orth' else: eval_gz = 'GTO_Gv_nonorth' gxyzT = numpy.asarray(gxyz.T, order='C', dtype=numpy.int32) p_gxyzT = gxyzT.ctypes.data_as(ctypes.c_void_p) b = numpy.hstack((b.ravel(), q) + Gvbase) p_b = b.ctypes.data_as(ctypes.c_void_p) p_mesh = (ctypes.c_int*3)(*[len(x) for x in Gvbase]) Ls = cell.get_lattice_Ls() expkL = numpy.exp(1j * numpy.dot(kptjs, Ls.T)) atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, cell._atm, cell._bas, cell._env) ao_loc = gto.moleintor.make_loc(bas, intor) if shls_slice is None: shls_slice = (0, cell.nbas, cell.nbas, cell.nbas*2) else: shls_slice = (shls_slice[0], shls_slice[1], cell.nbas+shls_slice[2], cell.nbas+shls_slice[3]) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nkpts = len(kptjs) nimgs = len(Ls) shape = (nkpts, comp, ni, nj, nGv) # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # hermi operation needs reordering the axis-0. It is inefficient. if aosym == 's1hermi': # Symmetry for Gamma point assert(is_zero(q) and is_zero(kptjs) and ni == nj) elif aosym == 's2': i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 shape = (nkpts, comp, nij, nGv) drv = libpbc.PBC_ft_latsum_drv cintor = getattr(libpbc, intor) eval_gz = getattr(libpbc, eval_gz) if nkpts == 1: fill = getattr(libpbc, 'PBC_ft_fill_nk1'+aosym) else: fill = getattr(libpbc, 'PBC_ft_fill_k'+aosym) out = numpy.ndarray(shape, dtype=numpy.complex128, buffer=out) drv(cintor, eval_gz, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*4)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), GvT.ctypes.data_as(ctypes.c_void_p), p_b, p_gxyzT, p_mesh, ctypes.c_int(nGv), atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.nbas), env.ctypes.data_as(ctypes.c_void_p)) if aosym == 's1hermi': for i in range(1,ni): out[:,:,:i,i] = out[:,:,i,:i] out = numpy.rollaxis(out, -1, 2) if comp == 1: out = out[:,0] return out
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, cderi_file, 'int3c2e_sph', aosym='s2', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) feri = h5py.File(cderi_file) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # feri['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() feri['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T aoaux = LkR = LkI = coulG = None j2c = None def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d'%ji][:naux0,col0:col1] = v del(feri['j2c/%d'%uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d'%ji][:naux0] del(feri['j3c/%d'%ji]) feri['j3c/%d'%ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()