def vppnl_by_k(kpt): Gk = Gv + kpt G_rad = lib.norm(Gk, axis=1) aokG = ft_ao.ft_ao(cell, Gv, kpt=kpt) * (ngs / cell.vol) vppnl = 0 for ia in range(cell.natm): symb = cell.atom_symbol(ia) if symb not in cell._pseudo: continue pp = cell._pseudo[symb] for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: hl = numpy.asarray(hl) fakemol._bas[0, gto.ANG_OF] = l fakemol._env[ptr + 3] = 0.5 * rl ** 2 fakemol._env[ptr + 4] = rl ** (l + 1.5) * numpy.pi ** 1.25 pYlm_part = dft.numint.eval_ao(fakemol, Gk, deriv=0) pYlm = numpy.empty((nl, l * 2 + 1, ngs)) for k in range(nl): qkl = pseudo.pp._qli(G_rad * rl, l, k) pYlm[k] = pYlm_part.T * qkl # pYlm is real SPG_lmi = numpy.einsum("g,nmg->nmg", SI[ia].conj(), pYlm) SPG_lm_aoG = numpy.einsum("nmg,gp->nmp", SPG_lmi, aokG) tmp = numpy.einsum("ij,jmp->imp", hl, SPG_lm_aoG) vppnl += numpy.einsum("imp,imq->pq", SPG_lm_aoG.conj(), tmp) return vppnl * (1.0 / ngs ** 2)
def get_nuc_less_accurate(mydf, kpts=None): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) nkpts = len(kpts_lst) if mydf._cderi is None: mydf.build() cell = mydf.cell fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) nao = cell.nao_nr() charge = -cell.atom_charges() j2c = pgto.intor_cross("cint2c2e_sph", fused_cell, _fake_nuc(cell)) jaux = j2c.dot(charge) jaux -= charge.sum() * mydf.auxbar(fused_cell) Gv = cell.get_Gv(mydf.gs) SI = cell.get_SI(Gv) # The normal nuclues have been considered in function get_gth_vlocG_part1 # The result vG is the potential in G-space for erf part of the pp nuclues and # "numpy.dot(charge, SI) * coulG" for normal nuclues. vpplocG = pgto.pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vG = -1.0 / cell.vol * numpy.einsum("ij,ij->j", SI, vpplocG) kpt_allow = numpy.zeros(3) if is_zero(kpts_lst): vj = numpy.zeros((nkpts, nao ** 2)) else: vj = numpy.zeros((nkpts, nao ** 2), dtype=numpy.complex128) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 in mydf.ft_loop(cell, mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory): if not gamma_point(kpts_lst[k]): vj[k] += numpy.einsum("k,xk->x", vG.real, pqkI) * 1j vj[k] += numpy.einsum("k,xk->x", vG.imag, pqkR) * -1j vj[k] += numpy.einsum("k,xk->x", vG.real, pqkR) vj[k] += numpy.einsum("k,xk->x", vG.imag, pqkI) pqkR = pqkI = None Gv = cell.get_Gv(mydf.gs) aoaux = ft_ao.ft_ao(fused_cell, Gv) jaux -= numpy.einsum("x,xj->j", vG.real, aoaux.real) jaux -= numpy.einsum("x,xj->j", vG.imag, aoaux.imag) jaux = fuse(jaux) vj = vj.reshape(-1, nao, nao) for k, kpt in enumerate(kpts_lst): with mydf.load_Lpq((kpt, kpt)) as Lpq: v = 0 for p0, p1 in lib.prange(0, jaux.size, mydf.blockdim): v += numpy.dot(jaux[p0:p1], numpy.asarray(Lpq[p0:p1])) if gamma_point(kpt): vj[k] += lib.unpack_tril(numpy.asarray(v.real, order="C")) else: vj[k] += lib.unpack_tril(v) if kpts is None or numpy.shape(kpts) == (3,): vj = vj[0] return vj
def test_ft_ao(self): coords = pdft.gen_grid.gen_uniform_grids(cell) aoR = pdft.numint.eval_ao(cell, coords) ngs, nao = aoR.shape ref = numpy.asarray([tools.fft(aoR[:,i], cell.gs) for i in range(nao)]) ref = ref.T * (cell.vol/ngs) dat = ft_ao.ft_ao(cell, cell.Gv) self.assertAlmostEqual(numpy.linalg.norm(ref[:,0]-dat[:,0]) , 8.4358614794095722e-11, 9) self.assertAlmostEqual(numpy.linalg.norm(ref[:,1]-dat[:,1]) , 0.0041669297531642616 , 4) self.assertAlmostEqual(numpy.linalg.norm(ref[:,2:]-dat[:,2:]), 5.8677286005879366e-14, 9)
def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v
def test_ft_ao_with_kpts(self): numpy.random.seed(1) kpt = numpy.random.random(3) coords = pdft.gen_grid.gen_uniform_grids(cell) aoR = pdft.numint.eval_ao(cell, coords, kpt=kpt) ngs, nao = aoR.shape expmikr = numpy.exp(-1j*numpy.dot(coords,kpt)) ref = numpy.asarray([tools.fftk(aoR[:,i], cell.gs, expmikr) for i in range(nao)]) ref = ref.T * (cell.vol/ngs) dat = ft_ao.ft_ao(cell, cell.Gv, kpt=kpt) self.assertAlmostEqual(numpy.linalg.norm(ref[:,0]-dat[:,0]) , 1.3359899490499813e-10, 9) self.assertAlmostEqual(numpy.linalg.norm(ref[:,1]-dat[:,1]) , 0.0042404556036939756 , 4) self.assertAlmostEqual(numpy.linalg.norm(ref[:,2:]-dat[:,2:]), 4.8856357999633564e-14, 9)
def vppnl_by_k(kpt): Gk = Gv + kpt G_rad = lib.norm(Gk, axis=1) aokG = ft_ao.ft_ao(cell, Gv, kpt=kpt) * (ngs / cell.vol) vppnl = 0 for ia in range(cell.natm): symb = cell.atom_symbol(ia) if symb not in cell._pseudo: continue pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: fakemol._bas[0, gto.ANG_OF] = l fakemol._env[ptr + 3] = .5 * rl**2 fakemol._env[ptr + 4] = rl**(l + 1.5) * numpy.pi**1.25 pYlm_part = dft.numint.eval_ao(fakemol, Gk, deriv=0) p0, p1 = p1, p1 + nl * (l * 2 + 1) # pYlm is real, SI[ia] is complex pYlm = numpy.ndarray((nl, l * 2 + 1, ngs), dtype=numpy.complex128, buffer=buf[p0:p1]) for k in range(nl): qkl = pseudo.pp._qli(G_rad * rl, l, k) pYlm[k] = pYlm_part.T * qkl #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) if p1 > 0: SPG_lmi = buf[:p1] SPG_lmi *= SI[ia].conj() SPG_lm_aoGs = lib.zdot(SPG_lmi, aokG) p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: p0, p1 = p1, p1 + nl * (l * 2 + 1) hl = numpy.asarray(hl) SPG_lm_aoG = SPG_lm_aoGs[p0:p1].reshape( nl, l * 2 + 1, -1) tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) return vppnl * (1. / ngs**2)
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) charge = -cell.atom_charges() kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, mesh=mydf.mesh, Gv=Gv) coulG *= kws if mydf.eta == 0: wj = numpy.zeros((nkpts,nao_pair), dtype=numpy.complex128) SI = cell.get_SI(Gv) vG = numpy.einsum('i,ix->x', charge, SI) * coulG wj = numpy.zeros((nkpts,nao_pair), dtype=numpy.complex128) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole.gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvp1')) t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG if cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('int1e_kin', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) # *2 due to the factor 1/2 in T wj[k] -= nucbar*2 * s max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2', intor='GTO_ft_pdotp'): for k, aoao in enumerate(aoaoks): if aft_jk.gamma_point(kpts_lst[k]): wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting pnucp', *t1) wj_kpts = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): wj_kpts.append(lib.unpack_tril(wj[k].real.copy())) else: wj_kpts.append(lib.unpack_tril(wj[k])) if kpts is None or numpy.shape(kpts) == (3,): wj_kpts = wj_kpts[0] return numpy.asarray(wj_kpts)
def get_pp_loc_part1(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t0 = t1 = (time.clock(), time.time()) mesh = numpy.asarray(mydf.mesh) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 charges = cell.atom_charges() kpt_allow = numpy.zeros(3) if mydf.eta == 0: if cell.dimension > 0: ke_guess = estimate_ke_cutoff(cell, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any( mesh[:cell.dimension] < mesh_guess[:cell.dimension] * .8): logger.warn( mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) vpplocG *= kws vG = vpplocG vj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) else: if cell.dimension > 0: ke_guess = estimate_ke_cutoff_for_eta(cell, mydf.eta, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any(mesh < mesh_guess * .8): logger.warn( mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) mesh_min = numpy.min((mesh_guess, mesh), axis=0) if cell.dimension < 2 or cell.low_dim_ft_type == 'inf_vacuum': mesh[:cell.dimension] = mesh_min[:cell.dimension] else: mesh = mesh_min Gv, Gvbase, kws = cell.get_Gv_weights(mesh) nuccell = _compensate_nuccell(mydf) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) t0 = t1 = log.timer_debug1('vnuc pass1: analytic int', *t0) coulG = tools.get_coulG(cell, kpt_allow, mesh=mesh, Gv=Gv) * kws aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', -charges, aoaux) * coulG max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): for k, aoao in enumerate(aoaoks): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting Vnuc [%s:%s]' % (p0, p1), *t1) log.timer_debug1('contracting Vnuc', *t0) vj_kpts = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj_kpts.append(lib.unpack_tril(vj[k].real.copy())) else: vj_kpts.append(lib.unpack_tril(vj[k])) if kpts is None or numpy.shape(kpts) == (3, ): vj_kpts = vj_kpts[0] return numpy.asarray(vj_kpts)
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) if mydf.metric.upper() != 'J': outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c[k] = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c[k] -= lib.dot(kLR.T, kLR) j2c[k] -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c[k] -= j2cR + j2cI * 1j kLR *= coulG.reshape(-1,1) kLI *= coulG.reshape(-1,1) kLRs.append(kLR) kLIs.append(kLI) aoaux = kLR = kLI = j2cR = j2cI = coulG = None feri = h5py.File(mydf._cderi) log.debug2('memory = %s', lib.current_memory()[0]) # Expand approx Lpq for aosym='s1'. The approx Lpq are all in aosym='s2' mode if mydf.approx_sr_level > 0 and len(kptij_lst) > 1: Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao) def save(label, dat, col0, col1): nrow = dat.shape[0] feri[label][:nrow,col0:col1] = dat def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d'%idx][:,col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d'%idx][:,col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:,col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:,col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None log.debug3(' istep, k = %d %d memory = %s', istep, k, lib.current_memory()[0]) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) log.debug3(' p0:p1 = %d:%d memory = %s', p0, p1, lib.current_memory()[0]) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) log.debug3(' p0:p1 = %d:%d memory = %s', p0, p1, lib.current_memory()[0]) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d'%ji, j3cR[k], col0, col1) else: save('j3c/%d'%ji, j3cR[k]+j3cI[k]*1j, col0, col1) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) vpplocG *= kws vGR = vpplocG.real vGI = vpplocG.imag vjR = numpy.zeros((nkpts,nao_pair)) vjI = numpy.zeros((nkpts,nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/gto.mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) vjR = vj.real vjI = vj.imag t1 = log.timer_debug1('vnuc pass1: analytic int', *t1) charge = -cell.atom_charges() coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs, Gv=Gv) coulG *= kws aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', charge, aoaux.real) * coulG vGI = numpy.einsum('i,xi->x', charge, aoaux.imag) * coulG max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not gamma_point(kpts_lst[k]): vjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) vjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) vjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) vjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('cint1e_ovlp_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) vjR[k] -= nucbar * s.real vjI[k] -= nucbar * s.imag vj = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj.append(lib.unpack_tril(vjR[k])) else: vj.append(lib.unpack_tril(vjR[k]+vjI[k]*1j)) if kpts is None or numpy.shape(kpts) == (3,): vj = vj[0] return vj
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) vpplocG *= kws vG = vpplocG vj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) else: nuccell = copy.copy(cell) half_sph_norm = .5 / numpy.sqrt(numpy.pi) norm = half_sph_norm / gto.mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) t1 = log.timer_debug1('vnuc pass1: analytic int', *t1) charge = -cell.atom_charges() coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs, Gv=Gv) coulG *= kws aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): for k, aoao in enumerate(aoaoks): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting Vnuc', *t1) vj_kpts = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj_kpts.append(lib.unpack_tril(vj[k].real.copy())) else: vj_kpts.append(lib.unpack_tril(vj[k])) if kpts is None or numpy.shape(kpts) == (3, ): vj_kpts = vj_kpts[0] return numpy.asarray(vj_kpts)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # Create swap file to avoid huge cderi_file. see also function # pyscf.pbc.df.df._make_j3c swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = mydf.weighted_coulG(kpt, False, mesh) LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(LkR*coulG, LkR.T) j2c_k -= lib.dot(LkI*coulG, LkI.T) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(LkR*coulG, LkI*coulG, LkR.T, LkI.T) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d'%k] = j2c_k aoaux = LkR = LkI = j2cR = j2cI = coulG = None j2c = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stability w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) j2ctags = [] t1 = log.timer_debug1('2c2e', *t1) swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) j2c[k] = fuse(fuse(j2c[k]).T).T.copy() j2c_k = numpy.zeros_like(j2c[k]) for p0, p1 in mydf.mpi_prange(0, ngrids): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T aoaux = fuse(aoaux) LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2cR = lib.dot(LkR*coulG[p0:p1], LkR.T) j2c_k += lib.dot(LkI*coulG[p0:p1], LkI.T, 1, j2cR, 1) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(LkR*coulG[p0:p1], LkI*coulG[p0:p1], LkR.T, LkI.T) j2c_k += j2cR + j2cI * 1j LkR = LkI = None j2c[k] -= mpi.allreduce(j2c_k) try: fswap['j2c/%d'%k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') except scipy.linalg.LinAlgError: w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0]/w[-1], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) fswap['j2c/%d'%k] = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: fswap['j2c-/%d'%k] = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = v1 = v2 = None j2ctags.append('eig') aoaux = kLR = kLI = j2cR = j2cI = coulG = None j2c = None aosym_s2 = numpy.einsum('ix->i', abs(kptis-kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse==x).sum() for x in set(uniq_inverse)) buflen = max(int(min(max_memory*.5e6/16/naux/(nkptj_max+2)/nao, nao/3/mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = mpi_df.grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr(fused_cell.cart) def gen_int3c(job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1*(i1+1)//2 - i0*(i0+1)//2 dij = (i1 - i0) * nao if j_only: buflen = max(8, int(max_memory*1e6/16/(nkptij*dii+dii))) else: buflen = max(8, int(max_memory*1e6/16/(nkptij*dij+dij))) auxranges = balance_segs(aux_loc[1:]-aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij*dij*buflen, dtype=dtype) buf1 = numpy.empty(dij*buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): fswap.create_dataset(key, shape, 'f8') else: fswap.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e1 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) if j_only: mat = numpy.ndarray((nkptij,dii,nrow), dtype=dtype, buffer=buf) else: mat = numpy.ndarray((nkptij,dij,nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = fswap['%s/%d'%(dataname,k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0*(i0+1)//2:i1*(i1+1)//2] - i0 * nao out = numpy.ndarray((nrow,dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0+nrow] = v.real else: h5dat[naux0:naux0+nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = lib.transpose(numpy.asarray(Gaux.real, order='C')) kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C')) j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = fuse(numpy.asarray(fswap[key])) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol*Gblksize) pqkIbuf = numpy.empty(ncol*Gblksize) buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v) mpi_df._assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, gs) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1, None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot( j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) SI = cell.get_SI(Gv) vGR = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.real) * kws vGI = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.imag) * kws wjR = numpy.zeros((nkpts,nao_pair)) wjI = numpy.zeros((nkpts,nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'cint3c2e_pvp1_sph')) wjR = wj.real wjI = wj.imag t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.real) * kws vGI = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.imag) * kws max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not pwdf_jk.gamma_point(kpts_lst[k]): wjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) wjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol * 2 ovlp = cell.pbc_intor('cint1e_kin_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) wjR[k] -= nucbar * s.real wjI[k] -= nucbar * s.imag wj = [] for k, kpt in enumerate(kpts_lst): if pwdf_jk.gamma_point(kpt): wj.append(lib.unpack_tril(wjR[k])) else: wj.append(lib.unpack_tril(wjR[k]+wjI[k]*1j)) if kpts is None or numpy.shape(kpts) == (3,): wj = wj[0] return wj
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t0 = t1 = (time.clock(), time.time()) mesh = numpy.asarray(mydf.mesh) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 charges = cell.atom_charges() kpt_allow = numpy.zeros(3) if mydf.eta == 0: if cell.dimension > 0: ke_guess = estimate_ke_cutoff(cell, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any(mesh < mesh_guess * .8): logger.warn( mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) v1 = -vpplocG.copy() if cell.dimension == 1 or cell.dimension == 2: G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) coulG = 4 * numpy.pi / numpy.linalg.norm(Gv[G0idx], axis=1)**2 vpplocG[G0idx] += charges.sum() * SI_on_z * coulG vpplocG *= kws vG = vpplocG vj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) else: if cell.dimension > 0: ke_guess = estimate_ke_cutoff_for_eta(cell, mydf.eta, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any(mesh < mesh_guess * .8): logger.warn( mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) mesh_min = numpy.min( (mesh_guess[:cell.dimension], mesh[:cell.dimension]), axis=0) mesh[:cell.dimension] = mesh_min.astype(int) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) nuccell = copy.copy(cell) half_sph_norm = .5 / numpy.sqrt(numpy.pi) norm = half_sph_norm / gto.gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) t0 = t1 = log.timer_debug1('vnuc pass1: analytic int', *t0) coulG = tools.get_coulG(cell, kpt_allow, mesh=mesh, Gv=Gv) * kws aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', -charges, aoaux) * coulG if cell.dimension == 1 or cell.dimension == 2: G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) vG[G0idx] += charges.sum() * SI_on_z * coulG[G0idx] max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): for k, aoao in enumerate(aoaoks): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting Vnuc [%s:%s]' % (p0, p1), *t1) log.timer_debug1('contracting Vnuc', *t0) vj_kpts = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj_kpts.append(lib.unpack_tril(vj[k].real.copy())) else: vj_kpts.append(lib.unpack_tril(vj[k])) if kpts is None or numpy.shape(kpts) == (3, ): vj_kpts = vj_kpts[0] return numpy.asarray(vj_kpts)
def _int_nuc_vloc(mydf, nuccell, kpts, intor='int3c2e', aosym='s2', comp=1): '''Vnuc - Vloc''' cell = mydf.cell nkpts = len(kpts) # Use the 3c2e code with steep s gaussians to mimic nuclear density fakenuc = _fake_nuc(cell) fakenuc._atm, fakenuc._bas, fakenuc._env = \ gto.conc_env(nuccell._atm, nuccell._bas, nuccell._env, fakenuc._atm, fakenuc._bas, fakenuc._env) kptij_lst = numpy.hstack((kpts, kpts)).reshape(-1, 2, 3) buf = incore.aux_e2(cell, fakenuc, intor, aosym=aosym, comp=comp, kptij_lst=kptij_lst) charge = cell.atom_charges() charge = numpy.append(charge, -charge) # (charge-of-nuccell, charge-of-fakenuc) nao = cell.nao_nr() nchg = len(charge) if aosym == 's1': nao_pair = nao**2 else: nao_pair = nao * (nao + 1) // 2 if comp == 1: buf = buf.reshape(nkpts, nao_pair, nchg) mat = numpy.einsum('kxz,z->kx', buf, charge) else: buf = buf.reshape(nkpts, comp, nao_pair, nchg) mat = numpy.einsum('kcxz,z->kcx', buf, charge) if cell.dimension != 0 and intor in ('int3c2e', 'int3c2e_sph', 'int3c2e_cart'): assert (comp == 1) charge = -cell.atom_charges() if cell.dimension == 1 or cell.dimension == 2: Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) ZSI = numpy.einsum("i,ix->x", charge, cell.get_SI(Gv[G0idx])) ZSI -= numpy.einsum('i,xi->x', charge, ft_ao.ft_ao(nuccell, Gv[G0idx])) coulG = 4 * numpy.pi / numpy.linalg.norm(Gv[G0idx], axis=1)**2 nucbar = numpy.einsum('i,i,i,i', ZSI.conj(), coulG, kws[G0idx], SI_on_z) if abs(kpts).sum() < 1e-9: nucbar = nucbar.real else: # cell.dimension == 3 nucbar = sum( [z / nuccell.bas_exp(i)[0] for i, z in enumerate(charge)]) nucbar *= numpy.pi / cell.vol ovlp = cell.pbc_intor('int1e_ovlp', 1, lib.HERMITIAN, kpts) for k in range(nkpts): if aosym == 's1': mat[k] -= nucbar * ovlp[k].reshape(nao_pair) else: mat[k] -= nucbar * lib.pack_tril(ovlp[k]) return mat
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() # try: # j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) # except scipy.linalg.LinAlgError as e: # msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') # log.error(msg) # raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) # kLR = LkR.T # kLI = LkI.T # if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR) # if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI) # kLR *= coulG.reshape(-1,1) # kLI *= coulG.reshape(-1,1) # kLRs.append(kLR) # kLIs.append(kLI) # aoaux = LkR = LkI = kLR = kLI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() try: j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) except scipy.linalg.LinAlgError as e: msg =('===================================\n' 'J-metric not positive definite.\n' 'It is likely that gs is not enough.\n' '===================================') log.error(msg) raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) kLR = LkR[naux:].T kLI = LkI[naux:].T if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:]) if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:]) kLR *= coulG.reshape(-1,1) kLI *= coulG.reshape(-1,1) kLRs.append(kLR) kLIs.append(kLI) aoaux = LkR = LkI = kLR = kLI = coulG = None feri = h5py.File(mydf._cderi) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v, lower=True, overwrite_b=True) feri['j3c/%d'%ji][:naux,col0:col1] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) for k, kptij in enumerate(kptij_lst): v = feri['j3c/%d'%k][:naux] del(feri['j3c/%d'%k]) feri['j3c/%d'%k] = v feri.close()
def get_pp_loc_part1(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t0 = t1 = (time.clock(), time.time()) mesh = numpy.asarray(mydf.mesh) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 charges = cell.atom_charges() kpt_allow = numpy.zeros(3) if mydf.eta == 0: if cell.dimension > 0: ke_guess = estimate_ke_cutoff(cell, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any(mesh[:cell.dimension] < mesh_guess[:cell.dimension]*.8): logger.warn(mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) vpplocG *= kws vG = vpplocG vj = numpy.zeros((nkpts,nao_pair), dtype=numpy.complex128) else: if cell.dimension > 0: ke_guess = estimate_ke_cutoff_for_eta(cell, mydf.eta, cell.precision) mesh_guess = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) if numpy.any(mesh < mesh_guess*.8): logger.warn(mydf, 'mesh %s is not enough for AFTDF.get_nuc function ' 'to get integral accuracy %g.\nRecommended mesh is %s.', mesh, cell.precision, mesh_guess) mesh_min = numpy.min((mesh_guess, mesh), axis=0) if cell.dimension < 2 or cell.low_dim_ft_type == 'inf_vacuum': mesh[:cell.dimension] = mesh_min[:cell.dimension] else: mesh = mesh_min Gv, Gvbase, kws = cell.get_Gv_weights(mesh) nuccell = _compensate_nuccell(mydf) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) t0 = t1 = log.timer_debug1('vnuc pass1: analytic int', *t0) coulG = tools.get_coulG(cell, kpt_allow, mesh=mesh, Gv=Gv) * kws aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', -charges, aoaux) * coulG max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): for k, aoao in enumerate(aoaoks): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: vj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting Vnuc [%s:%s]'%(p0, p1), *t1) log.timer_debug1('contracting Vnuc', *t0) vj_kpts = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj_kpts.append(lib.unpack_tril(vj[k].real.copy())) else: vj_kpts.append(lib.unpack_tril(vj[k])) if kpts is None or numpy.shape(kpts) == (3,): vj_kpts = vj_kpts[0] return numpy.asarray(vj_kpts)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def get_nuc_less_accurate(mydf, kpts=None): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) nkpts = len(kpts_lst) if mydf._cderi is None: mydf.build() cell = mydf.cell fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) nao = cell.nao_nr() charge = -cell.atom_charges() j2c = pgto.intor_cross('cint2c2e_sph', fused_cell, _fake_nuc(cell)) jaux = j2c.dot(charge) jaux -= charge.sum() * mydf.auxbar(fused_cell) Gv = cell.get_Gv(mydf.gs) SI = cell.get_SI(Gv) # The normal nuclues have been considered in function get_gth_vlocG_part1 # The result vG is the potential in G-space for erf part of the pp nuclues and # "numpy.dot(charge, SI) * coulG" for normal nuclues. vpplocG = pgto.pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vG = -1. / cell.vol * numpy.einsum('ij,ij->j', SI, vpplocG) kpt_allow = numpy.zeros(3) if is_zero(kpts_lst): vj = numpy.zeros((nkpts, nao**2)) else: vj = numpy.zeros((nkpts, nao**2), dtype=numpy.complex128) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(cell, mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory): if not gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,xk->x', vG.real, pqkI) * 1j vj[k] += numpy.einsum('k,xk->x', vG.imag, pqkR) * -1j vj[k] += numpy.einsum('k,xk->x', vG.real, pqkR) vj[k] += numpy.einsum('k,xk->x', vG.imag, pqkI) pqkR = pqkI = None Gv = cell.get_Gv(mydf.gs) aoaux = ft_ao.ft_ao(fused_cell, Gv) jaux -= numpy.einsum('x,xj->j', vG.real, aoaux.real) jaux -= numpy.einsum('x,xj->j', vG.imag, aoaux.imag) jaux = fuse(jaux) vj = vj.reshape(-1, nao, nao) for k, kpt in enumerate(kpts_lst): with mydf.load_Lpq((kpt, kpt)) as Lpq: v = 0 for p0, p1 in lib.prange(0, jaux.size, mydf.blockdim): v += numpy.dot(jaux[p0:p1], numpy.asarray(Lpq[p0:p1])) if gamma_point(kpt): vj[k] += lib.unpack_tril(numpy.asarray(v.real, order='C')) else: vj[k] += lib.unpack_tril(v) if kpts is None or numpy.shape(kpts) == (3, ): vj = vj[0] return vj
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) if mydf._cderi is None: mydf.build() log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) auxcell = mydf.auxcell nuccell = make_modchg_basis(cell, mydf.eta, 0) nuccell._bas = numpy.asarray(nuccell._bas[nuccell._bas[:,gto.ANG_OF]==0], dtype=numpy.int32, order='C') charge = -cell.atom_charges() nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol vj = [v.ravel() for v in _int_nuc_vloc(cell, nuccell, kpts_lst)] t1 = log.timer_debug1('vnuc pass1: analytic int', *t1) j2c = pgto.intor_cross('cint2c2e_sph', auxcell, nuccell) jaux = j2c.dot(charge) kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs) / cell.vol Gv = cell.get_Gv(mydf.gs) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', charge, aoaux.real) * coulG vGI = numpy.einsum('i,xi->x', charge, aoaux.imag) * coulG max_memory = mydf.max_memory - lib.current_memory()[0] for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(cell, mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) * 1j vj[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) *-1j vj[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) vj[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) # Append nuccell to auxcell, so that they can be FT together in pw_loop # the first [:naux] of ft_ao are aux fitting functions. nuccell._atm, nuccell._bas, nuccell._env = \ gto.conc_env(auxcell._atm, auxcell._bas, auxcell._env, nuccell._atm, nuccell._bas, nuccell._env) naux = auxcell.nao_nr() aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi,x->x', charge, aoaux[:,naux:], coulG) jaux -= numpy.einsum('x,xj->j', vG.real, aoaux[:,:naux].real) jaux -= numpy.einsum('x,xj->j', vG.imag, aoaux[:,:naux].imag) jaux -= charge.sum() * mydf.auxbar(auxcell) ovlp = cell.pbc_intor('cint1e_ovlp_sph', 1, lib.HERMITIAN, kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) blksize = max(16, min(int(max_memory*1e6/16/nao_pair), mydf.blockdim)) for k, kpt in enumerate(kpts_lst): with mydf.load_Lpq((kpt,kpt)) as Lpq: v = 0 for p0, p1 in lib.prange(0, jaux.size, blksize): v += numpy.dot(jaux[p0:p1], numpy.asarray(Lpq[p0:p1])) vj[k] = vj[k].reshape(nao,nao) - nucbar * ovlp[k] if gamma_point(kpt): vj[k] += lib.unpack_tril(numpy.asarray(v.real,order='C')) else: vj[k] += lib.unpack_tril(v) if kpts is None or numpy.shape(kpts) == (3,): vj = vj[0] return vj
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) if mydf._cderi is None: mydf.build() log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) nuccell = make_modchg_basis(cell, mydf.eta, 0) nuccell._bas = numpy.asarray(nuccell._bas[nuccell._bas[:, gto.ANG_OF] == 0], dtype=numpy.int32, order='C') charge = -cell.atom_charges() nucbar = sum([z / nuccell.bas_exp(i)[0] for i, z in enumerate(charge)]) nucbar *= numpy.pi / cell.vol vj = [v.ravel() for v in _int_nuc_vloc(cell, nuccell, kpts_lst)] t1 = log.timer_debug1('vnuc pass1: analytic int', *t1) j2c = pgto.intor_cross('cint2c2e_sph', fused_cell, nuccell) jaux = j2c.dot(charge) kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs) / cell.vol Gv = cell.get_Gv(mydf.gs) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', charge, aoaux.real) * coulG vGI = numpy.einsum('i,xi->x', charge, aoaux.imag) * coulG max_memory = mydf.max_memory - lib.current_memory()[0] for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(cell, mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not gamma_point(kpts_lst[k]): vj[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) * 1j vj[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) * -1j vj[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) vj[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) pqkR = pqkI = None t1 = log.timer_debug1('contracting Vnuc', *t1) vG = numpy.einsum('i,xi,x->x', charge, ft_ao.ft_ao(nuccell, Gv), coulG) aoaux = ft_ao.ft_ao(fused_cell, Gv) jaux -= numpy.einsum('x,xj->j', vG.real, aoaux.real) jaux -= numpy.einsum('x,xj->j', vG.imag, aoaux.imag) jaux -= charge.sum() * mydf.auxbar(fused_cell) jaux = fuse(jaux) aoaux = None ovlp = cell.pbc_intor('cint1e_ovlp_sph', 1, lib.HERMITIAN, kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(16, min(int(max_memory * 1e6 / 16 / nao_pair), mydf.blockdim)) for k, kpt in enumerate(kpts_lst): with mydf.load_Lpq((kpt, kpt)) as Lpq: v = 0 for p0, p1 in lib.prange(0, jaux.size, blksize): v += numpy.dot(jaux[p0:p1], numpy.asarray(Lpq[p0:p1])) vj[k] = vj[k].reshape(nao, nao) - nucbar * ovlp[k] if gamma_point(kpt): vj[k] += lib.unpack_tril(numpy.asarray(v.real, order='C')) else: vj[k] += lib.unpack_tril(v) if kpts is None or numpy.shape(kpts) == (3, ): vj = vj[0] return vj
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = lib.transpose(numpy.asarray(Gaux.real, order='C')) kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C')) j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = fuse(numpy.asarray(fswap[key])) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol*Gblksize) pqkIbuf = numpy.empty(ncol*Gblksize) buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs gxyz = lib.cartesian_prod( (numpy.append(range(gs[0] + 1), range(-gs[0], 0)), numpy.append(range(gs[1] + 1), range(-gs[1], 0)), numpy.append(range(gs[2] + 1), range(-gs[2], 0)))) invh = numpy.linalg.inv(cell._h) Gv = 2 * numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, invh, gxyz, gs, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(tools.get_coulG(cell, kpt, gs=gs) / cell.vol) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c[k] = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c[k] -= lib.dot(kLR.T, kLR) j2c[k] -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c[k] -= j2cR + j2cI * 1j kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = kLR = kLI = j2cR = j2cI = coulG = None feri = h5py.File(mydf._cderi) # Expand approx Lpq for aosym='s1'. The approx Lpq are all in aosym='s2' mode if mydf.approx_sr_level > 0 and len(kptij_lst) > 1: Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao) def save(label, dat, col0, col1): nrow = dat.shape[0] feri[label][:nrow, col0:col1] = dat def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d' % idx][:, col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d' % ji, j3cR[k], col0, col1) else: save('j3c/%d' % ji, j3cR[k] + j3cI[k] * 1j, col0, col1) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst): max_memory = max(2000, mydf.max_memory-pyscflib.current_memory()[0]) fused_cell, fuse = df.df.fuse_auxcell(mydf, auxcell) log = Logger(mydf.stdout, mydf.verbose) nao, nfao = cell.nao_nr(), fused_cell.nao_nr() jobs = np.arange(fused_cell.nbas) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) j3c_junk = ctf.zeros([len(kptij_lst), nao**2, nfao], dtype=np.complex128) t1 = t0 = (time.clock(), time.time()) idx_full = np.arange(j3c_junk.size).reshape(j3c_junk.shape) if len(tasks) > 0: q0, q1 = tasks[0], tasks[-1] + 1 shls_slice = (0, cell.nbas, 0, cell.nbas, q0, q1) bstart, bend = fused_cell.ao_loc_nr()[q0], fused_cell.ao_loc_nr()[q1] idx = idx_full[:,:,bstart:bend].ravel() tmp = df.incore.aux_e2(cell, fused_cell, intor='int3c2e', aosym='s2', kptij_lst=kptij_lst, shls_slice=shls_slice) nao_pair = nao**2 if tmp.shape[-2] != nao_pair and tmp.ndim == 2: tmp = pyscflib.unpack_tril(tmp, axis=0).reshape(nao_pair,-1) j3c_junk.write(idx, tmp.ravel()) else: j3c_junk.write([],[]) t1 = log.timer('j3c_junk', *t1) naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = pyscflib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis mydf.kptij_lst = kptij_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) jobs = np.arange(len(uniq_kpts)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) j2c = ctf.zeros([len(uniq_kpts),naux,naux], dtype=np.complex128) a = cell.lattice_vectors() / (2*np.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = np.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = np.rint(kdif) mask = np.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = np.where(mask)[0] return uniq_kptji_ids def cholesky_decomposed_metric(j2c_kptij): j2c_negative = None try: j2c_kptij = scipy.linalg.cholesky(j2c_kptij, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: w, v = scipy.linalg.eigh(j2c_kptij) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], np.count_nonzero(w<mydf.linear_dep_threshold)) v1 = np.zeros(v.T.shape, dtype=v.dtype) v1[w>mydf.linear_dep_threshold,:] = v[:,w>mydf.linear_dep_threshold].conj().T v1[w>mydf.linear_dep_threshold,:] /= np.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c_kptij = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = np.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = np.zeros(v1.shape, dtype=v1.dtype) j2c_negative[idx,:] = (v[:,idx]/np.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c_kptij, j2c_negative, j2ctag for itask in range(ntasks): if itask >= len(tasks): j2c.write([],[]) continue k = tasks[itask] kpt = uniq_kpts[k] j2ctmp = np.asarray(fused_cell.pbc_intor('int2c2e', hermi=1, kpts=kpt)) coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in pyscflib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T if is_zero(kpt): j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T).real j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T else: j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T) j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T.conj() tmp = fuse(fuse(j2ctmp).T).T idx = k * naux**2 + np.arange(naux**2) j2c.write(idx, tmp.ravel()) j2ctmp = tmp = None coulG = None t1 = log.timer('j2c', *t1) j3c = ctf.zeros([len(kpt_ji),nao,nao,naux], dtype=np.complex128) jobs = np.arange(len(kpt_ji)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) for itask in range(ntasks): if itask >= len(tasks): j2c_ji = j2c.read([]) j3ctmp = j3c_junk.read([]) j3c.write([],[]) continue idx_ji = tasks[itask] kpti, kptj = kptij_lst[idx_ji] idxi, idxj = member(kpti, mydf.kpts), member(kptj, mydf.kpts) uniq_idx = uniq_inverse[idx_ji] kpt = uniq_kpts[uniq_idx] id_eq = kconserve_indices(-kpt) id_conj = kconserve_indices(kpt) id_conj = np.asarray([i for i in id_conj if i not in id_eq], dtype=int) id_full = np.hstack((id_eq, id_conj)) map_id, conj = min(id_full), np.argmin(id_full) >=len(id_eq) j2cidx = map_id * naux**2 + np.arange(naux**2) j2c_ji = j2c.read(j2cidx).reshape(naux, naux) # read to be added j2c_ji, j2c_negative, j2ctag = cholesky_decomposed_metric(j2c_ji) if conj: j2c_ji = j2c_ji.conj() shls_slice= (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) j3c_id = idx_ji * nao**2*nfao + np.arange(nao**2*nfao) j3ctmp = j3c_junk.read(j3c_id).reshape(nao**2, fused_cell.nao_nr()).T if is_zero(kpt): # kpti == kptj if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kptj) for i in np.where(vbar != 0)[0]: j3ctmp[i] -= vbar[i] * ovlp.reshape(-1) aoao = ft_ao._ft_aopair_kpts(cell, Gv, None, 's1', b, gxyz, Gvbase, kpt, kptj)[0].reshape(len(Gv),-1) j3ctmp[naux:] -= np.dot(Gaux.T.conj(), aoao) j3ctmp = fuse(j3ctmp) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c_ji, j3ctmp, lower=True, overwrite_b=True) else: v = np.dot(j2c_ji, j3ctmp) v = v.T.reshape(nao,nao,naux) j3c_id = idx_ji * nao**2*naux + np.arange(nao**2*naux) j3c.write(j3c_id, v.ravel()) mydf.j3c = j3c return None
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) SI = cell.get_SI(Gv) vGR = numpy.einsum('i,ix->x', 4 * numpy.pi * charge, SI.real) * kws vGI = numpy.einsum('i,ix->x', 4 * numpy.pi * charge, SI.imag) * kws wjR = numpy.zeros((nkpts, nao_pair)) wjI = numpy.zeros((nkpts, nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5 / numpy.sqrt(numpy.pi) norm = half_sph_norm / mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray( mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvp1_sph')) wjR = wj.real wjI = wj.imag t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', 4 * numpy.pi * charge, aoaux.real) * kws vGI = numpy.einsum('i,xi->x', 4 * numpy.pi * charge, aoaux.imag) * kws max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not aft_jk.gamma_point(kpts_lst[k]): wjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) wjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z / nuccell.bas_exp(i)[0] for i, z in enumerate(charge)]) nucbar *= numpy.pi / cell.vol * 2 ovlp = cell.pbc_intor('int1e_kin_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) wjR[k] -= nucbar * s.real wjI[k] -= nucbar * s.imag wj = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): wj.append(lib.unpack_tril(wjR[k])) else: wj.append(lib.unpack_tril(wjR[k] + wjI[k] * 1j)) if kpts is None or numpy.shape(kpts) == (3, ): wj = wj[0] return wj
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # # try: # j2c[k] = scipy.linalg.cholesky(j2c[k], lower=True) # except scipy.linalg.LinAlgError as e: # msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') # log.error(msg) # raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) # kLR = LkR.T # kLI = LkI.T # if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR) # if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI) # kLR *= coulG.reshape(-1,1) # kLI *= coulG.reshape(-1,1) # kLRs.append(kLR) # kLIs.append(kLI) # aoaux = LkR = LkI = kLR = kLI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() j2c[k] = fuse(fuse(j2c[k]).T).T try: j2c[k] = ('CD', scipy.linalg.cholesky(j2c[k], lower=True)) except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < LINEAR_DEP_THR)) v = v[:, w > LINEAR_DEP_THR].T.conj() v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1) j2c[k] = ('eig', v) kLR = LkR[naux:].T kLI = LkI[naux:].T if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:]) if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:]) kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = LkR = LkI = kLR = kLI = coulG = None nauxs = [v[1].shape[0] for v in j2c] feri = h5py.File(mydf._cderi) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2c[uniq_kptji_id][0] == 'CD': v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1], v, lower=True, overwrite_b=True) else: v = lib.dot(j2c[uniq_kptji_id][1], v) feri['j3c/%d' % ji][:naux0, col0:col1] = v naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if is_zero(kpt): aosym = 's2' else: aosym = 's1' j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(feri[key]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])
def _make_j3c(mydf, cell, auxcell, chgcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) auxcell = copy.copy(auxcell) auxcell._atm, auxcell._bas, auxcell._env = \ gto.conc_env(auxcell._atm, auxcell._bas, auxcell._env, chgcell._atm, chgcell._bas, chgcell._env) outcore.aux_e2(cell, auxcell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs gxyz = lib.cartesian_prod((numpy.append(range(gs[0]+1), range(-gs[0],0)), numpy.append(range(gs[1]+1), range(-gs[1],0)), numpy.append(range(gs[2]+1), range(-gs[2],0)))) invh = numpy.linalg.inv(cell._h) Gv = 2*numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = auxcell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(auxcell, Gv, None, invh, gxyz, gs, kpt) coulG = tools.get_coulG(cell, kpt, gs=gs) / cell.vol aoauxG = aoaux * coulG.reshape(-1,1) kLRs.append(numpy.asarray(aoauxG.real, order='C')) kLIs.append(numpy.asarray(aoauxG.imag, order='C')) if is_zero(kpt): # kpti == kptj j2c[k] -= lib.dot(kLRs[-1].T, numpy.asarray(aoaux.real,order='C')) j2c[k] -= lib.dot(kLIs[-1].T, numpy.asarray(aoaux.imag,order='C')) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2c[k] -= lib.dot(aoauxG.conj().T, aoaux) aoaux = coulG = None feri = h5py.File(mydf._cderi) # Expand approx Lpq for aosym='s1'. The approx Lpq are all in aosym='s2' mode if mydf.approx_sr_level > 0 and len(kptij_lst) > 1: Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao) def save(label, dat, col0, col1): feri[label][:,col0:col1] = dat def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(auxcell) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d'%idx][:,col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:,col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:,col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) if is_zero(kpt): for k, ji in enumerate(adapted_ji_idx): if gamma_point(adapted_kptjs[k]): for i, c in enumerate(vbar): if c != 0: j3cR[k][i] -= c * ovlp[k][col0:col1].real else: for i, c in enumerate(vbar): if c != 0: j3cR[k][i] -= c * ovlp[k][col0:col1].real j3cI[k][i] -= c * ovlp[k][col0:col1].imag for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d'%ji, j3cR[k], col0, col1) else: save('j3c/%d'%ji, j3cR[k]+j3cI[k]*1j, col0, col1) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) charge = -cell.atom_charges() kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, mesh=mydf.mesh, Gv=Gv) coulG *= kws if mydf.eta == 0: wj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) SI = cell.get_SI(Gv) vG = numpy.einsum('i,ix->x', charge, SI) * coulG wj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) else: nuccell = copy.copy(cell) half_sph_norm = .5 / numpy.sqrt(numpy.pi) norm = half_sph_norm / mole.gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvp1')) t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG if cell.dimension == 3: nucbar = sum( [z / nuccell.bas_exp(i)[0] for i, z in enumerate(charge)]) nucbar *= numpy.pi / cell.vol ovlp = cell.pbc_intor('int1e_kin', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) # *2 due to the factor 1/2 in T wj[k] -= nucbar * 2 * s max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2', intor='GTO_ft_pdotp'): for k, aoao in enumerate(aoaoks): if aft_jk.gamma_point(kpts_lst[k]): wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting pnucp', *t1) wj_kpts = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): wj_kpts.append(lib.unpack_tril(wj[k].real.copy())) else: wj_kpts.append(lib.unpack_tril(wj[k])) if kpts is None or numpy.shape(kpts) == (3, ): wj_kpts = wj_kpts[0] return numpy.asarray(wj_kpts)
def _gaussian_int(cell): r'''Regular gaussian integral \int g(r) dr^3''' return ft_ao.ft_ao(cell, numpy.zeros((1,3)))[0].real
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) j2ctags = [] nauxs = [] t1 = log.timer_debug1('2c2e', *t1) if h5py.is_hdf5(cderi_file): feri = h5py.File(cderi_file) else: feri = h5py.File(cderi_file, 'w') for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) aoaux = None kLR1 = numpy.asarray(kLR[:, naux:], order='C') kLI1 = numpy.asarray(kLI[:, naux:], order='C') if is_zero(kpt): # kpti == kptj for p0, p1 in mydf.mpi_prange(0, ngs): j2cR = lib.ddot(kLR1[p0:p1].T, kLR[p0:p1]) j2cR = lib.ddot(kLI1[p0:p1].T, kLI[p0:p1], 1, j2cR, 1) j2c[k][naux:] -= mpi.allreduce(j2cR) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: for p0, p1 in mydf.mpi_prange(0, ngs): j2cR, j2cI = zdotCN(kLR1[p0:p1].T, kLI1[p0:p1].T, kLR[p0:p1], kLI[p0:p1]) j2cR = mpi.allreduce(j2cR) j2cI = mpi.allreduce(j2cI) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() j2c[k] = fuse(fuse(j2c[k]).T).T try: feri['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') nauxs.append(naux) except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug2('metric linear dependency for kpt %s', uniq_kptji_id) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < LINEAR_DEP_THR)) v = v[:, w > LINEAR_DEP_THR].T.conj() v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1) feri['j2c/%d' % k] = v j2ctags.append('eig') nauxs.append(v.shape[0]) kLR = kLI = kLR1 = kLI1 = coulG = None j2c = None aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' vbar = mydf.auxbar(fused_cell) vbar = fuse(vbar) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=kptjs[aosym_s2]) ovlp = [lib.pack_tril(s) for s in ovlp] t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse)) buflen = max( int( min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao, nao / 3 / mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e_sph') def gen_int3c(auxcell, job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id if dataname in feri: del (feri[dataname]) i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 dij = (i1 - i0) * nao if j_only: buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii))) else: buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij))) auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * dij * buflen, dtype=dtype) buf1 = numpy.empty(dij * buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): feri.create_dataset(key, shape, 'f8') else: feri.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e2 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) if j_only: mat = numpy.ndarray((nkptij, dii, nrow), dtype=dtype, buffer=buf) else: mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = feri['%s/%d' % (dataname, k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2] - i0 * nao out = numpy.ndarray((nrow, dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0 + nrow] = v.real else: h5dat[naux0:naux0 + nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if is_zero(kpt): aosym = 's2' else: aosym = 's1' j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(feri[key]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v t2 = t1 j3c_workers = numpy.zeros(len(j3c_jobs), dtype=int) #for job_id, ish0, ish1 in mpi.work_share_partition(j3c_jobs): for job_id, ish0, ish1 in mpi.work_stealing_partition(j3c_jobs): gen_int3c(fused_cell, job_id, ish0, ish1) t2 = log.alltimer_debug2('int j3c %d' % job_id, *t2) for k, kpt in enumerate(uniq_kpts): ft_fuse(job_id, k, ish0, ish1) t2 = log.alltimer_debug2('ft-fuse %d k %d' % (job_id, k), *t2) j3c_workers[job_id] = rank j3c_workers = mpi.allreduce(j3c_workers) log.debug2('j3c_workers %s', j3c_workers) j2c = kLRs = kLIs = ovlp = vbar = fuse = gen_int3c = ft_fuse = None t1 = log.timer_debug1('int3c and fuse', *t1) def get_segs_loc(aosym): off0 = numpy.asarray([ao_loc[i0] for x, i0, i1 in j3c_jobs]) off1 = numpy.asarray([ao_loc[i1] for x, i0, i1 in j3c_jobs]) if aosym: # s2 dims = off1 * (off1 + 1) // 2 - off0 * (off0 + 1) // 2 else: dims = (off1 - off0) * nao #dims = numpy.asarray([ao_loc[i1]-ao_loc[i0] for x,i0,i1 in j3c_jobs]) dims = numpy.hstack( [dims[j3c_workers == w] for w in range(mpi.pool.size)]) job_idx = numpy.hstack( [numpy.where(j3c_workers == w)[0] for w in range(mpi.pool.size)]) segs_loc = numpy.append(0, numpy.cumsum(dims)) segs_loc = [(segs_loc[j], segs_loc[j + 1]) for j in numpy.argsort(job_idx)] return segs_loc segs_loc_s1 = get_segs_loc(False) segs_loc_s2 = get_segs_loc(True) if 'j3c' in feri: del (feri['j3c']) segsize = (max(nauxs) + mpi.pool.size - 1) // mpi.pool.size naux0 = rank * segsize for k, kptij in enumerate(kptij_lst): naux1 = min(nauxs[uniq_inverse[k]], naux0 + segsize) nrow = max(0, naux1 - naux0) if gamma_point(kptij): dtype = 'f8' else: dtype = 'c16' if aosym_s2[k]: nao_pair = nao * (nao + 1) // 2 else: nao_pair = nao * nao feri.create_dataset('j3c/%d' % k, (nrow, nao_pair), dtype, maxshape=(None, nao_pair)) def load(k, p0, p1): naux1 = nauxs[uniq_inverse[k]] slices = [(min(i * segsize + p0, naux1), min(i * segsize + p1, naux1)) for i in range(mpi.pool.size)] segs = [] for p0, p1 in slices: val = [] for job_id, worker in enumerate(j3c_workers): if rank == worker: key = 'j3c-chunks/%d/%d' % (job_id, k) val.append(feri[key][p0:p1].ravel()) if val: segs.append(numpy.hstack(val)) else: segs.append(numpy.zeros(0)) return segs def save(k, p0, p1, segs): segs = mpi.alltoall(segs) naux1 = nauxs[uniq_inverse[k]] loc0, loc1 = min(p0, naux1 - naux0), min(p1, naux1 - naux0) nL = loc1 - loc0 if nL > 0: if aosym_s2[k]: segs = numpy.hstack([ segs[i0 * nL:i1 * nL].reshape(nL, -1) for i0, i1 in segs_loc_s2 ]) else: segs = numpy.hstack([ segs[i0 * nL:i1 * nL].reshape(nL, -1) for i0, i1 in segs_loc_s1 ]) feri['j3c/%d' % k][loc0:loc1] = segs mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, min(8000, mydf.max_memory - mem_now)) if numpy.all(aosym_s2): if gamma_point(kptij_lst): blksize = max(16, int(max_memory * .5e6 / 8 / nao**2)) else: blksize = max(16, int(max_memory * .5e6 / 16 / nao**2)) else: blksize = max(16, int(max_memory * .5e6 / 16 / nao**2 / 2)) log.debug1('max_momory %d MB (%d in use), blksize %d', max_memory, mem_now, blksize) t2 = t1 with lib.call_in_background(save) as async_write: for k, kptji in enumerate(kptij_lst): for p0, p1 in lib.prange(0, segsize, blksize): segs = load(k, p0, p1) async_write(k, p0, p1, segs) t2 = log.timer_debug1( 'assemble k=%d %d:%d (in %d)' % (k, p0, p1, segsize), *t2) if 'j3c-chunks' in feri: del (feri['j3c-chunks']) if 'j3c-kptij' in feri: del (feri['j3c-kptij']) feri['j3c-kptij'] = kptij_lst t1 = log.alltimer_debug1('assembling j3c', *t1) feri.close()
def get_pbc_pvxp(cell, kpts=None): import numpy import copy import time from pyscf import lib from pyscf.lib import logger from pyscf.pbc import tools from pyscf.gto import mole from pyscf.pbc.df import ft_ao from pyscf.pbc.df import aft_jk from pyscf.pbc.df import aft if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(cell.stdout, cell.verbose) t1 = t0 = (time.clock(), time.time()) mydf = aft.AFTDF(cell, kpts) mydf.eta = 0.2 ke_guess = aft.estimate_ke_cutoff_for_eta(cell, mydf.eta, cell.precision) mydf.mesh = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_guess) log.debug('mydf.mesh %s', mydf.mesh) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) charge = -cell.atom_charges() # Apply Koseki effective charge? kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, mesh=mydf.mesh, Gv=Gv) coulG *= kws if mydf.eta == 0: soc_mat = numpy.zeros((nkpts,3,nao*nao), dtype=numpy.complex128) SI = cell.get_SI(Gv) vG = numpy.einsum('i,ix->x', charge, SI) * coulG else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole.gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) soc_mat = mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvxp1_sph', aosym='s1', comp=3) soc_mat = numpy.asarray(soc_mat).reshape(nkpts,3,nao**2) t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s1', intor='GTO_ft_pxp_sph', comp=3): for k, aoao in enumerate(aoaoks): aoao = aoao.reshape(3,-1,nao**2) if aft_jk.gamma_point(kpts_lst[k]): soc_mat[k] += numpy.einsum('k,ckx->cx', vG[p0:p1].real, aoao.real) soc_mat[k] += numpy.einsum('k,ckx->cx', vG[p0:p1].imag, aoao.imag) else: soc_mat[k] += numpy.einsum('k,ckx->cx', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting pnucp', *t1) soc_mat_kpts = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): soc_mat_kpts.append(soc_mat[k].real.reshape(3,nao,nao)) else: soc_mat_kpts.append(soc_mat[k].reshape(3,nao,nao)) if kpts is None or numpy.shape(kpts) == (3,): soc_mat_kpts = soc_mat_kpts[0] return numpy.asarray(soc_mat_kpts)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, cderi_file, 'int3c2e_sph', aosym='s2', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) feri = h5py.File(cderi_file) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(kLR.T, kLR) j2c_k -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c_k -= j2cR + j2cI * 1j feri['j2c/%d' % k] = j2c_k aoaux = kLR = kLI = j2cR = j2cI = coulG = None j2c = None def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, gs) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None if cell.dimension == 1 or cell.dimension == 2: plain_ints = _gaussian_int(fused_cell) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints) aoaux = aoaux.T LkR = aoaux.real * coulG[p0:p1] LkI = aoaux.imag * coulG[p0:p1] aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _gaussian_int(cell): r'''Regular gaussian integral \int g(r) dr^3''' return ft_ao.ft_ao(cell, numpy.zeros((1, 3)))[0].real
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) j2ctags = [] t1 = log.timer_debug1('2c2e', *t1) swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) j2c_k = numpy.zeros_like(j2c[k]) for p0, p1 in mydf.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c_k[naux:] += lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c_k[naux:] += lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c_k[naux:] += j2cR + j2cI * 1j kLR = kLI = None j2c_k[:naux, naux:] = j2c_k[naux:, :naux].conj().T j2c[k] -= mpi.allreduce(j2c_k) j2c[k] = fuse(fuse(j2c[k]).T).T try: fswap['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) fswap['j2c/%d' % k] = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: fswap['j2c-/%d' % k] = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = v1 = None j2ctags.append('eig') j2c = coulG = None aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse)) buflen = max( int( min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao, nao / 3 / mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e') def gen_int3c(job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 if j_only: dij = dii buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii))) else: dij = (i1 - i0) * nao buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij))) auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * dij * buflen, dtype=dtype) buf1 = numpy.empty(dij * buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): fswap.create_dataset(key, shape, 'f8') else: fswap.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e1 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = fswap['%s/%d' % (dataname, k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2] - i0 * nao out = numpy.ndarray((nrow, dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0 + nrow] = v.real else: h5dat[naux0:naux0 + nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1, None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot( j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v) _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)
def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])