def _ewald_exxdiv_1d2d(cell, kpts, dms, vk, kpts_band=None): s = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts) madelung = tools.pbc.madelung(cell, kpts) Gv, Gvbase, kws = cell.get_Gv_weights(cell.mesh) G0idx, SI_on_z = gto.cell._SI_for_uniform_model_charge(cell, Gv) coulG = 4 * numpy.pi / numpy.linalg.norm(Gv[G0idx], axis=1)**2 wcoulG = coulG * kws[G0idx] aoao_ij = ft_ao._ft_aopair_kpts(cell, Gv[G0idx], kptjs=kpts) aoao_kl = ft_ao._ft_aopair_kpts(cell, -Gv[G0idx], kptjs=kpts) def _contract_(vk, dms, s, aoao_ij, aoao_kl, kweight): # Without removing aoao(Gx=0,Gy=0), the summation of vk and ewald probe # charge correction (as _ewald_exxdiv_3d did) gives the reasonable # finite value for vk. Here madelung constant and vk were calculated # without (Gx=0,Gy=0). The code below restores the (Gx=0,Gy=0) part. madelung_mod = numpy.einsum('g,g,g', SI_on_z.conj(), wcoulG, SI_on_z) tmp_ij = numpy.einsum('gij,g,g->ij', aoao_ij, wcoulG, SI_on_z.conj()) tmp_kl = numpy.einsum('gij,g,g->ij', aoao_kl, wcoulG, SI_on_z) for i, dm in enumerate(dms): #:aoaomod_ij = aoao_ij - numpy.einsum('g,ij->gij', SI_on_z , s) #:aoaomod_kl = aoao_kl - numpy.einsum('g,ij->gij', SI_on_z.conj(), s) #:ktmp = kweight * lib.einsum('gij,jk,g,gkl->il', aoao_ij , dm, wcoulG, aoao_kl ) #:ktmp -= kweight * lib.einsum('gij,jk,g,gkl->il', aoaomod_ij, dm, wcoulG, aoaomod_kl) #:ktmp += (madelung - kweight*wcoulG.sum()) * reduce(numpy.dot, (s, dm, s)) ktmp = kweight * lib.einsum('ij,jk,kl->il', tmp_ij, dm, s) ktmp += kweight * lib.einsum('ij,jk,kl->il', s, dm, tmp_kl) ktmp += ( (madelung - kweight * wcoulG.sum() - kweight * madelung_mod) * reduce(numpy.dot, (s, dm, s))) if vk.dtype == numpy.double: vk[i] += ktmp.real else: vk[i] += ktmp if kpts is None: _contract_(vk, dms, s, aoao_ij[0], aoao_kl[0], 1) elif numpy.shape(kpts) == (3, ): if kpts_band is None or is_zero(kpts_band - kpts): _contract_(vk, dms, s, aoao_ij[0], aoao_kl[0], 1) elif kpts_band is None or numpy.array_equal(kpts, kpts_band): nkpts = len(kpts) for k in range(nkpts): _contract_(vk[:, k], dms[:, k], s[k], aoao_ij[k], aoao_kl[k], 1. / nkpts) else: nkpts = len(kpts) for k, kpt in enumerate(kpts): for kp in member(kpt, kpts_band.reshape(-1, 3)): _contract_(vk[:, kp], dms[:, k], s[k], aoao_ij[k], aoao_kl[k], 1. / nkpts)
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v
def ft_loop(self, cell, gs=None, kpt=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000): ''' Fourier transform iterator for all kpti which satisfy kpt = kpts - kpti ''' if gs is None: gs = self.gs if kpts is None: assert(gamma_point(kpt)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) nao = cell.nao_nr() gxyz = lib.cartesian_prod((numpy.append(range(gs[0]+1), range(-gs[0],0)), numpy.append(range(gs[1]+1), range(-gs[1],0)), numpy.append(range(gs[2]+1), range(-gs[2],0)))) invh = numpy.linalg.inv(cell._h) Gv = 2*numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # The hermi operation needs reordering the axis-0. It is inefficient if gamma_point(kpt) and gamma_point(kpts): aosym = 's1hermi' else: aosym = 's1' blksize = min(max(16, int(max_memory*.9e6/(nao**2*(nkpts+1)*16))), 16384) buf = [numpy.zeros(nao*nao*blksize, dtype=numpy.complex128) for k in range(nkpts)] pqkRbuf = numpy.empty(nao*nao*blksize) pqkIbuf = numpy.empty(nao*nao*blksize) for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG,nao,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((nao,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nao,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) yield (k, pqkR.reshape(-1,nG), pqkI.reshape(-1,nG), p0, p1) aoao[:] = 0 # == buf[k][:] = 0
def ft_loop(self, mesh=None, q=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1', intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for all kpti which satisfy 2pi*N = (kpts - kpti - q)*a, N = -1, 0, 1 ''' cell = self.cell if mesh is None: mesh = self.mesh if kpts is None: assert (is_zero(q)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert (shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni * nj blksize = max(16, int(max_memory * .9e6 / (nij * nkpts * 16 * comp))) blksize = min(blksize, ngrids, 16384) buf = numpy.empty(nkpts * nij * blksize * comp, dtype=numpy.complex128) for p0, p1 in self.prange(0, ngrids, blksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, intor, comp, out=buf) yield dat, p0, p1
def test_ft_aoao_with_kpts(self): numpy.random.seed(1) kpti, kptj = kpts = numpy.random.random((2,3)) Gv = cell.get_Gv([5]*3) kpt = numpy.random.random(3) dat = ft_ao._ft_aopair_kpts(cell, Gv, kpt=kpt, kptjs=kpts) self.assertAlmostEqual(finger(dat[0]), (2.3753953914129382-2.5365192689115088j), 9) self.assertAlmostEqual(finger(dat[1]), (2.4951510097641840-3.1990956672116355j), 9) dat = ft_ao.ft_aopair(cell, Gv) self.assertAlmostEqual(finger(dat), (1.2534723618134684+1.830086071817564j), 9)
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v
def pw_loop(self, cell, gs=None, kpti_kptj=None, shls_slice=None, max_memory=2000): '''Plane wave part''' if gs is None: gs = self.gs if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj nao = cell.nao_nr() gxyz = lib.cartesian_prod((numpy.append(range(gs[0]+1), range(-gs[0],0)), numpy.append(range(gs[1]+1), range(-gs[1],0)), numpy.append(range(gs[2]+1), range(-gs[2],0)))) invh = numpy.linalg.inv(cell._h) Gv = 2*numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # The hermi operation needs reordering the axis-0. It is inefficient if gamma_point(kpti) and gamma_point(kptj): aosym = 's1hermi' else: aosym = 's1' blksize = min(max(16, int(max_memory*1e6*.75/16/nao**2)), 16384) sublk = max(16, int(blksize//4)) buf = [numpy.zeros(nao*nao*blksize, dtype=numpy.complex128)] pqkRbuf = numpy.empty(nao*nao*sublk) pqkIbuf = numpy.empty(nao*nao*sublk) for p0, p1 in self.prange(0, ngs, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, invh, # gxyz[p0:p1], gs, (kpti, kptj)) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kptj-kpti, kptj.reshape(1,3), out=buf)[0] for i0, i1 in lib.prange(0, p1-p0, sublk): nG = i1 - i0 pqkR = numpy.ndarray((nao,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nao,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.transpose(1,2,0) pqkI[:] = aoao[i0:i1].imag.transpose(1,2,0) yield (pqkR.reshape(-1,nG), pqkI.reshape(-1,nG), p0+i0, p0+i1) aoao[:] = 0
def ft_loop(self, mesh=None, q=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1', intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for all kpti which satisfy 2pi*N = (kpts - kpti - q)*a, N = -1, 0, 1 ''' cell = self.cell if mesh is None: mesh = self.mesh if kpts is None: assert(is_zero(q)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert(shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni*nj blksize = max(16, int(max_memory*.9e6/(nij*nkpts*16*comp))) blksize = min(blksize, ngrids, 16384) buf = numpy.empty(nkpts*nij*blksize*comp, dtype=numpy.complex128) for p0, p1 in self.prange(0, ngrids, blksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, intor, comp, out=buf) yield dat, p0, p1
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1("kpt = %s", kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1("adapted_ji_idx = %s", adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = "s2" nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor("cint1e_ovlp_sph", hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = "s1" nao_pair = nao ** 2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * 0.6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == "s2": Gblksize = max(16, int(max_memory * 0.2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * 0.4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1("int3c2e [%d/%d], AO [%d:%d], ncol = %d", istep + 1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri["j3c/%d" % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri["Lpq/%d" % idx][:, col0:col1]) elif aosym == "s2": Lpq = numpy.asarray(feri["Lpq/0"][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -0.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order="C")) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order="C")) v = Lpq = None if aosym == "s2": shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts( cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf ) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order="F", buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts( cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf ) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order="F", buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save("j3c/%d" % ji, j3cR[k], col0, col1) else: save("j3c/%d" % ji, j3cR[k] + j3cI[k] * 1j, col0, col1)
def pw_loop(self, mesh=None, kpti_kptj=None, q=None, shls_slice=None, max_memory=2000, aosym='s1', blksize=None, intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for AO pair ''' cell = self.cell if mesh is None: mesh = self.mesh if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj if q is None: q = kptj - kpti ao_loc = cell.ao_loc_nr() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert (shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni * nj if blksize is None: blksize = min( max(64, int(max_memory * 1e6 * .75 / (nij * 16 * comp))), 16384) sublk = int(blksize // 4) else: sublk = blksize buf = numpy.empty(nij * blksize * comp, dtype=numpy.complex128) pqkRbuf = numpy.empty(nij * sublk * comp) pqkIbuf = numpy.empty(nij * sublk * comp) for p0, p1 in self.prange(0, ngrids, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, # b, Gvbase, gxyz[p0:p1], mesh, (kpti, kptj), q) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kptj.reshape(1, 3), intor, comp, out=buf)[0] aoao = aoao.reshape(p1 - p0, nij) for i0, i1 in lib.prange(0, p1 - p0, sublk): nG = i1 - i0 if comp == 1: pqkR = numpy.ndarray((nij, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij, nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.T pqkI[:] = aoao[i0:i1].imag.T else: pqkR = numpy.ndarray((comp, nij, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((comp, nij, nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.transpose(0, 2, 1) pqkI[:] = aoao[i0:i1].imag.transpose(0, 2, 1) yield (pqkR, pqkI, p0 + i0, p0 + i1)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2c[uniq_kptji_id][0] == 'CD': v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1], v, lower=True, overwrite_b=True) else: v = lib.dot(j2c[uniq_kptji_id][1], v) feri['j3c/%d' % ji][:naux0, col0:col1] = v naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def pw_loop(self, gs=None, kpti_kptj=None, q=None, shls_slice=None, max_memory=2000, aosym='s1', blksize=None): '''Plane wave part''' cell = self.cell if gs is None: gs = self.gs if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj if q is None: q = kptj - kpti ao_loc = cell.ao_loc_nr() Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert (shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni * nj if blksize is None: blksize = min(max(16, int(max_memory * 1e6 * .75 / 16 / nij)), 16384) sublk = max(16, int(blksize // 4)) else: sublk = blksize buf = numpy.empty(nij * blksize, dtype=numpy.complex128) pqkRbuf = numpy.empty(nij * sublk) pqkIbuf = numpy.empty(nij * sublk) for p0, p1 in self.prange(0, ngs, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, # b, Gvbase, gxyz[p0:p1], gs, (kpti, kptj), q) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kptj.reshape(1, 3), out=buf)[0] aoao = aoao.reshape(p1 - p0, nij) for i0, i1 in lib.prange(0, p1 - p0, sublk): nG = i1 - i0 pqkR = numpy.ndarray((nij, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij, nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.T pqkI[:] = aoao[i0:i1].imag.T yield (pqkR, pqkI, p0 + i0, p0 + i1)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v, lower=True, overwrite_b=True) feri['j3c/%d'%ji][:naux,col0:col1] = v
def ft_loop(self, mesh=None, q=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1', intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for all kpti which satisfy 2pi*N = (kpts - kpti - q)*a, N = -1, 0, 1 ''' cell = self.cell if mesh is None: mesh = self.mesh if kpts is None: assert (is_zero(q)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert (shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni * nj if (abs(q).sum() < 1e-6 and intor[:11] == 'GTO_ft_ovlp' and (cell.dimension == 1 or cell.dimension == 2)): s = cell.pbc_intor('int1e_ovlp', kpts=kpts) if aosym == 's2': s = [lib.pack_tril(x) for x in s] else: s = None blksize = max(16, int(max_memory * .9e6 / (nij * nkpts * 16 * comp))) blksize = min(blksize, ngrids, 16384) buf = numpy.empty(nkpts * nij * blksize * comp, dtype=numpy.complex128) for p0, p1 in self.prange(0, ngrids, blksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, intor, comp, out=buf) if s is not None: # to remove the divergent part in 1D/2D systems G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge( cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, kpt in enumerate(kpts): dat[k][G0idx] -= numpy.einsum('g,...->g...', SI_on_z, s[k]) yield dat, p0, p1
def ft_loop(self, cell, gs=None, kpt=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000): ''' Fourier transform iterator for all kpti which satisfy kpt = kpts - kpti ''' if gs is None: gs = self.gs if kpts is None: assert (gamma_point(kpt)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) nao = cell.nao_nr() gxyz = lib.cartesian_prod((numpy.append(range(gs[0] + 1), range(-gs[0], 0)), numpy.append(range(gs[1] + 1), range(-gs[1], 0)), numpy.append(range(gs[2] + 1), range(-gs[2], 0)))) invh = numpy.linalg.inv(cell._h) Gv = 2 * numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # The hermi operation needs reordering the axis-0. It is inefficient if gamma_point(kpt) and gamma_point(kpts): aosym = 's1hermi' else: aosym = 's1' blksize = min( max(16, int(max_memory * .9e6 / (nao**2 * (nkpts + 1) * 16))), 16384) buf = [ numpy.zeros(nao * nao * blksize, dtype=numpy.complex128) for k in range(nkpts) ] pqkRbuf = numpy.empty(nao * nao * blksize) pqkIbuf = numpy.empty(nao * nao * blksize) for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG, nao, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((nao, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nao, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) yield (k, pqkR.reshape(-1, nG), pqkI.reshape(-1, nG), p0, p1) aoao[:] = 0 # == buf[k][:] = 0
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def ft_loop(self, gs=None, kpt=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1'): ''' Fourier transform iterator for all kpti which satisfy kpt = kpts - kpti ''' cell = self.cell if gs is None: gs = self.gs if kpts is None: assert(gamma_point(kpt)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(gs) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert(shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni*nj blksize = max(16, int(max_memory*.9e6/(nij*(nkpts+1)*16))) blksize = min(blksize, ngs, 16384) buf = [numpy.zeros(nij*blksize, dtype='D') for k in range(nkpts)] pqkRbuf = numpy.empty(nij*blksize) pqkIbuf = numpy.empty(nij*blksize) if aosym == 's2': for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG,nij), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((nij,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T yield (k, pqkR, pqkI, p0, p1) aoao[:] = 0 # == buf[k][:] = 0 else: for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG,ni,nj), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nj,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nj,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) yield (k, pqkR.reshape(-1,nG), pqkI.reshape(-1,nG), p0, p1) aoao[:] = 0 # == buf[k][:] = 0
def pw_loop(self, gs=None, kpti_kptj=None, shls_slice=None, max_memory=2000, aosym='s1', blksize=None): '''Plane wave part''' cell = self.cell if gs is None: gs = self.gs if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj ao_loc = cell.ao_loc_nr() Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert(shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni*nj if blksize is None: blksize = min(max(16, int(max_memory*1e6*.75/16/nij)), 16384) sublk = max(16, int(blksize//4)) else: subblk = blksize buf = [numpy.zeros(nij*blksize, dtype=numpy.complex128)] pqkRbuf = numpy.empty(nij*sublk) pqkIbuf = numpy.empty(nij*sublk) if aosym == 's2': for p0, p1 in self.prange(0, ngs, blksize): aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kptj-kpti, kptj.reshape(1,3), out=buf)[0] for i0, i1 in lib.prange(0, p1-p0, sublk): nG = i1 - i0 pqkR = numpy.ndarray((nij,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij,nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.T pqkI[:] = aoao[i0:i1].imag.T yield (pqkR, pqkI, p0+i0, p0+i1) aoao[:] = 0 else: for p0, p1 in self.prange(0, ngs, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, # b, Gvbase, gxyz[p0:p1], gs, (kpti, kptj)) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kptj-kpti, kptj.reshape(1,3), out=buf)[0] for i0, i1 in lib.prange(0, p1-p0, sublk): nG = i1 - i0 pqkR = numpy.ndarray((ni,nj,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nj,nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.transpose(1,2,0) pqkI[:] = aoao[i0:i1].imag.transpose(1,2,0) yield (pqkR.reshape(-1,nG), pqkI.reshape(-1,nG), p0+i0, p0+i1) aoao[:] = 0
def ft_loop(self, gs=None, q=numpy.zeros(3), kpts=None, shls_slice=None, max_memory=4000, aosym='s1'): ''' Fourier transform iterator for all kpti which satisfy 2pi*N = (kpts - kpti - q)*a N = -1, 0, 1 ''' cell = self.cell if gs is None: gs = self.gs if kpts is None: assert (is_zero(q)) kpts = self.kpts kpts = numpy.asarray(kpts) nkpts = len(kpts) ao_loc = cell.ao_loc_nr() b = cell.reciprocal_vectors() Gv, Gvbase, kws = cell.get_Gv_weights(gs) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert (shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni * nj blksize = max(16, int(max_memory * .9e6 / (nij * (nkpts + 1) * 16))) blksize = min(blksize, ngs, 16384) buf = [numpy.zeros(nij * blksize, dtype='D') for k in range(nkpts)] pqkRbuf = numpy.empty(nij * blksize) pqkIbuf = numpy.empty(nij * blksize) if aosym == 's2': for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG, nij), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((nij, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T yield (k, pqkR, pqkI, p0, p1) aoao[:] = 0 # == buf[k][:] = 0 else: for p0, p1 in self.prange(0, ngs, blksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kpts, out=buf) nG = p1 - p0 for k in range(nkpts): aoao = numpy.ndarray((nG, ni, nj), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nj, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nj, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) yield (k, pqkR.reshape(-1, nG), pqkI.reshape(-1, nG), p0, p1) aoao[:] = 0 # == buf[k][:] = 0
def pw_loop(self, mesh=None, kpti_kptj=None, q=None, shls_slice=None, max_memory=2000, aosym='s1', blksize=None, intor='GTO_ft_ovlp', comp=1): ''' Fourier transform iterator for AO pair ''' cell = self.cell if mesh is None: mesh = self.mesh if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj if q is None: q = kptj - kpti ao_loc = cell.ao_loc_nr() Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas) if aosym == 's2': assert(shls_slice[2] == 0) i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] nij = i1*(i1+1)//2 - i0*(i0+1)//2 else: ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nij = ni*nj if blksize is None: blksize = min(max(64, int(max_memory*1e6*.75/(nij*16*comp))), 16384) sublk = int(blksize//4) else: sublk = blksize buf = numpy.empty(nij*blksize*comp, dtype=numpy.complex128) pqkRbuf = numpy.empty(nij*sublk*comp) pqkIbuf = numpy.empty(nij*sublk*comp) for p0, p1 in self.prange(0, ngrids, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, # b, Gvbase, gxyz[p0:p1], mesh, (kpti, kptj), q) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, q, kptj.reshape(1,3), intor, comp, out=buf)[0] aoao = aoao.reshape(p1-p0,nij) for i0, i1 in lib.prange(0, p1-p0, sublk): nG = i1 - i0 if comp == 1: pqkR = numpy.ndarray((nij,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nij,nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.T pqkI[:] = aoao[i0:i1].imag.T else: pqkR = numpy.ndarray((comp,nij,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((comp,nij,nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.transpose(0,2,1) pqkI[:] = aoao[i0:i1].imag.transpose(0,2,1) yield (pqkR, pqkI, p0+i0, p0+i1)
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if is_zero(kpt): aosym = 's2' else: aosym = 's1' j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(feri[key]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, gs) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = lib.transpose(numpy.asarray(Gaux.real, order='C')) kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C')) j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = fuse(numpy.asarray(fswap[key])) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol*Gblksize) pqkIbuf = numpy.empty(ncol*Gblksize) buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def _make_j3c(mydf, cell, auxcell, kptij_lst): max_memory = max(2000, mydf.max_memory-pyscflib.current_memory()[0]) fused_cell, fuse = df.df.fuse_auxcell(mydf, auxcell) log = Logger(mydf.stdout, mydf.verbose) nao, nfao = cell.nao_nr(), fused_cell.nao_nr() jobs = np.arange(fused_cell.nbas) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) j3c_junk = ctf.zeros([len(kptij_lst), nao**2, nfao], dtype=np.complex128) t1 = t0 = (time.clock(), time.time()) idx_full = np.arange(j3c_junk.size).reshape(j3c_junk.shape) if len(tasks) > 0: q0, q1 = tasks[0], tasks[-1] + 1 shls_slice = (0, cell.nbas, 0, cell.nbas, q0, q1) bstart, bend = fused_cell.ao_loc_nr()[q0], fused_cell.ao_loc_nr()[q1] idx = idx_full[:,:,bstart:bend].ravel() tmp = df.incore.aux_e2(cell, fused_cell, intor='int3c2e', aosym='s2', kptij_lst=kptij_lst, shls_slice=shls_slice) nao_pair = nao**2 if tmp.shape[-2] != nao_pair and tmp.ndim == 2: tmp = pyscflib.unpack_tril(tmp, axis=0).reshape(nao_pair,-1) j3c_junk.write(idx, tmp.ravel()) else: j3c_junk.write([],[]) t1 = log.timer('j3c_junk', *t1) naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = pyscflib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis mydf.kptij_lst = kptij_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) jobs = np.arange(len(uniq_kpts)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) j2c = ctf.zeros([len(uniq_kpts),naux,naux], dtype=np.complex128) a = cell.lattice_vectors() / (2*np.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = np.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = np.rint(kdif) mask = np.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = np.where(mask)[0] return uniq_kptji_ids def cholesky_decomposed_metric(j2c_kptij): j2c_negative = None try: j2c_kptij = scipy.linalg.cholesky(j2c_kptij, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: w, v = scipy.linalg.eigh(j2c_kptij) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], np.count_nonzero(w<mydf.linear_dep_threshold)) v1 = np.zeros(v.T.shape, dtype=v.dtype) v1[w>mydf.linear_dep_threshold,:] = v[:,w>mydf.linear_dep_threshold].conj().T v1[w>mydf.linear_dep_threshold,:] /= np.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c_kptij = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = np.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = np.zeros(v1.shape, dtype=v1.dtype) j2c_negative[idx,:] = (v[:,idx]/np.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c_kptij, j2c_negative, j2ctag for itask in range(ntasks): if itask >= len(tasks): j2c.write([],[]) continue k = tasks[itask] kpt = uniq_kpts[k] j2ctmp = np.asarray(fused_cell.pbc_intor('int2c2e', hermi=1, kpts=kpt)) coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in pyscflib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T if is_zero(kpt): j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T).real j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T else: j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T) j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T.conj() tmp = fuse(fuse(j2ctmp).T).T idx = k * naux**2 + np.arange(naux**2) j2c.write(idx, tmp.ravel()) j2ctmp = tmp = None coulG = None t1 = log.timer('j2c', *t1) j3c = ctf.zeros([len(kpt_ji),nao,nao,naux], dtype=np.complex128) jobs = np.arange(len(kpt_ji)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) for itask in range(ntasks): if itask >= len(tasks): j2c_ji = j2c.read([]) j3ctmp = j3c_junk.read([]) j3c.write([],[]) continue idx_ji = tasks[itask] kpti, kptj = kptij_lst[idx_ji] idxi, idxj = member(kpti, mydf.kpts), member(kptj, mydf.kpts) uniq_idx = uniq_inverse[idx_ji] kpt = uniq_kpts[uniq_idx] id_eq = kconserve_indices(-kpt) id_conj = kconserve_indices(kpt) id_conj = np.asarray([i for i in id_conj if i not in id_eq], dtype=int) id_full = np.hstack((id_eq, id_conj)) map_id, conj = min(id_full), np.argmin(id_full) >=len(id_eq) j2cidx = map_id * naux**2 + np.arange(naux**2) j2c_ji = j2c.read(j2cidx).reshape(naux, naux) # read to be added j2c_ji, j2c_negative, j2ctag = cholesky_decomposed_metric(j2c_ji) if conj: j2c_ji = j2c_ji.conj() shls_slice= (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) j3c_id = idx_ji * nao**2*nfao + np.arange(nao**2*nfao) j3ctmp = j3c_junk.read(j3c_id).reshape(nao**2, fused_cell.nao_nr()).T if is_zero(kpt): # kpti == kptj if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kptj) for i in np.where(vbar != 0)[0]: j3ctmp[i] -= vbar[i] * ovlp.reshape(-1) aoao = ft_ao._ft_aopair_kpts(cell, Gv, None, 's1', b, gxyz, Gvbase, kpt, kptj)[0].reshape(len(Gv),-1) j3ctmp[naux:] -= np.dot(Gaux.T.conj(), aoao) j3ctmp = fuse(j3ctmp) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c_ji, j3ctmp, lower=True, overwrite_b=True) else: v = np.dot(j2c_ji, j3ctmp) v = v.T.reshape(nao,nao,naux) j3c_id = idx_ji * nao**2*naux + np.arange(nao**2*naux) j3c.write(j3c_id, v.ravel()) mydf.j3c = j3c return None
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1, None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot( j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)
def pw_loop(self, cell, gs=None, kpti_kptj=None, shls_slice=None, max_memory=2000): '''Plane wave part''' if gs is None: gs = self.gs if kpti_kptj is None: kpti = kptj = numpy.zeros(3) else: kpti, kptj = kpti_kptj nao = cell.nao_nr() gxyz = lib.cartesian_prod((numpy.append(range(gs[0] + 1), range(-gs[0], 0)), numpy.append(range(gs[1] + 1), range(-gs[1], 0)), numpy.append(range(gs[2] + 1), range(-gs[2], 0)))) invh = numpy.linalg.inv(cell._h) Gv = 2 * numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] # Theoretically, hermitian symmetry can be also found for kpti == kptj: # f_ji(G) = \int f_ji exp(-iGr) = \int f_ij^* exp(-iGr) = [f_ij(-G)]^* # The hermi operation needs reordering the axis-0. It is inefficient if gamma_point(kpti) and gamma_point(kptj): aosym = 's1hermi' else: aosym = 's1' blksize = min(max(16, int(max_memory * 1e6 * .75 / 16 / nao**2)), 16384) sublk = max(16, int(blksize // 4)) buf = [numpy.zeros(nao * nao * blksize, dtype=numpy.complex128)] pqkRbuf = numpy.empty(nao * nao * sublk) pqkIbuf = numpy.empty(nao * nao * sublk) for p0, p1 in self.prange(0, ngs, blksize): #aoao = ft_ao.ft_aopair(cell, Gv[p0:p1], shls_slice, aosym, invh, # gxyz[p0:p1], gs, (kpti, kptj)) aoao = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kptj - kpti, kptj.reshape(1, 3), out=buf)[0] for i0, i1 in lib.prange(0, p1 - p0, sublk): nG = i1 - i0 pqkR = numpy.ndarray((nao, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((nao, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao[i0:i1].real.transpose(1, 2, 0) pqkI[:] = aoao[i0:i1].imag.transpose(1, 2, 0) yield (pqkR.reshape(-1, nG), pqkI.reshape(-1, nG), p0 + i0, p0 + i1) aoao[:] = 0