def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' if rank != 0: # to apply df_jk._ewald_exxdiv_for_G0 function once exxdiv = None vj, vk = df_jk.get_jk(mydf, dm, hermi, kpt, kpt_band, with_j, with_k, exxdiv) if with_j: vj = mpi.reduce(vj) if with_k: vk = mpi.reduce(vk) return vj, vk
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' if rank != 0: exxdiv = None vj, vk = mdf_jk.get_jk(mydf, dm, hermi, kpt, kpt_band, with_j, with_k, exxdiv) if with_j: vj = mpi.reduce(vj) if with_k: vk = mpi.reduce(vk) return vj, vk
def get_nuc(mydf, kpts): mydf = _sync_mydf(mydf) cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) if abs(kpts_lst).sum() < 1e-9: # gamma_point dtype = numpy.float64 else: dtype = numpy.complex128 mesh = mydf.mesh charge = -cell.atom_charges() Gv = cell.get_Gv(mesh) SI = cell.get_SI(Gv) rhoG = numpy.dot(charge, SI) coulG = tools.get_coulG(cell, mesh=mesh, Gv=Gv) vneG = rhoG * coulG vneR = tools.ifft(vneG, mydf.mesh).real vne = [0] * len(kpts_lst) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts_lst): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): vne[k] += lib.dot(ao.T.conj() * vneR[p0:p1], ao) ao = ao_ks = None vne = mpi.reduce(lib.asarray(vne)) if rank == 0: if kpts is None or numpy.shape(kpts) == (3, ): vne = vne[0] return vne
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): if rank != 0: # to apply df_jk._ewald_exxdiv_for_G0 function once exxdiv = None vk = mdf_jk.get_k_kpts(mydf, dm_kpts, hermi, kpts, kpts_band, exxdiv) vk = mpi.reduce(vk) return vk
def ecp_int(cell, kpts=None): if rank == 0: comm.bcast(cell.dumps()) else: cell = pgto.loads(comm.bcast(None)) if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) ecpcell = gto.Mole() ecpcell._atm = cell._atm # append a fictitious s function to mimic the auxiliary index in pbc.incore. # ptr2last_env_idx to force PBCnr3c_fill_* function to copy the entire "env" ptr2last_env_idx = len(cell._env) - 1 ecpbas = numpy.vstack([[0, 0, 1, 1, 0, ptr2last_env_idx, 0, 0], cell._ecpbas]).astype(numpy.int32) ecpcell._bas = ecpbas ecpcell._env = cell._env # In pbc.incore _ecpbas is appended to two sets of cell._bas and the # fictitious s function. cell._env[AS_ECPBAS_OFFSET] = cell.nbas * 2 + 1 cell._env[AS_NECPBAS] = len(cell._ecpbas) kptij_lst = numpy.hstack((kpts_lst,kpts_lst)).reshape(-1,2,3) nkpts = len(kpts_lst) if abs(kpts_lst).sum() < 1e-9: # gamma_point dtype = numpy.double else: dtype = numpy.complex128 ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] mat = numpy.zeros((nkpts,nao,nao), dtype=dtype) intor = cell._add_suffix('ECPscalar') int3c = incore.wrap_int3c(cell, ecpcell, intor, kptij_lst=kptij_lst) # shls_slice of auxiliary index (0,1) corresponds to the fictitious s function tasks = [(i, i+1, j, j+1, 0, 1) # shls_slice for i in range(cell.nbas) for j in range(i+1)] for shls_slice in mpi.work_stealing_partition(tasks): i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] j0 = ao_loc[shls_slice[2]] j1 = ao_loc[shls_slice[3]] buf = numpy.empty((nkpts,i1-i0,j1-j0), dtype=dtype) mat[:,i0:i1,j0:j1] = int3c(shls_slice, buf) buf = mpi.reduce(mat) if rank == 0: mat = [] for k, kpt in enumerate(kpts_lst): v = lib.unpack_tril(lib.pack_tril(buf[k]), lib.HERMITIAN) if abs(kpt).sum() < 1e-9: # gamma_point: v = v.real mat.append(v) if kpts is None or numpy.shape(kpts) == (3,): mat = mat[0] return mat
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' if rank != 0: # to apply df_jk._ewald_exxdiv_for_G0 function once exxdiv = None vj, vk = aft_jk.get_jk(mydf, dm, hermi, kpt, kpt_band, with_j, with_k, exxdiv) if with_j: vj = mpi.reduce(vj) if with_k: vk = mpi.reduce(vk) return vj, vk
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): mydf = _sync_mydf(mydf) cell = mydf.cell mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, mesh=mesh) ngrids = len(coulG) vR = rhoR = numpy.zeros((nset,ngrids)) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): for i in range(nset): rhoR[i,p0:p1] += numint.eval_rho(cell, ao, dms[i,k]) ao = ao_ks = None rhoR = mpi.allreduce(rhoR) for i in range(nset): rhoR[i] *= 1./nkpts rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh).real kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) weight = cell.vol / ngrids vR *= weight if gamma_point(kpts_band): vj_kpts = numpy.zeros((nset,nband,nao,nao)) else: vj_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts_band): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): for i in range(nset): vj_kpts[i,k] += lib.dot(ao.T.conj()*vR[i,p0:p1], ao) vj_kpts = mpi.reduce(vj_kpts) if gamma_point(kpts_band): vj_kpts = vj_kpts.real return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_pp(mydf, kpts=None): if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) mydf = _sync_mydf(mydf) vpp = aft.get_pp_loc_part1(mydf, kpts_lst) vpp = mpi.reduce(lib.asarray(vpp)) if rank == 0: vloc2 = pseudo.pp_int.get_pp_loc_part2(mydf.cell, kpts_lst) vppnl = pseudo.pp_int.get_pp_nl(mydf.cell, kpts_lst) for k in range(len(kpts_lst)): vpp[k] += numpy.asarray(vppnl[k] + vloc2[k], dtype=vpp.dtype) if kpts is None or numpy.shape(kpts) == (3,): vpp = vpp[0] return vpp
def get_pp(mydf, kpts=None): if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) mydf = _sync_mydf(mydf) vpp = aft.get_pp_loc_part1(mydf, kpts_lst) vpp = mpi.reduce(lib.asarray(vpp)) if rank == 0: vloc2 = pseudo.pp_int.get_pp_loc_part2(mydf.cell, kpts_lst) vppnl = pseudo.pp_int.get_pp_nl(mydf.cell, kpts_lst) for k in range(len(kpts_lst)): vpp[k] += numpy.asarray(vppnl[k] + vloc2[k], dtype=vpp.dtype) if kpts is None or numpy.shape(kpts) == (3, ): vpp = vpp[0] return vpp
def get_veff(mf, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): t0 = (logger.process_clock(), logger.perf_counter()) mf.unpack_(comm.bcast(mf.pack())) mol = mf.mol ni = mf._numint if mf.nlc != '': raise NotImplementedError omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) # Broadcast the large input arrays here. if any(comm.allgather(dm is mpi.Message.SkippedArg)): if rank == 0 and dm is None: dm = mf.make_rdm1() dm = mpi.bcast_tagged_array(dm) if any(comm.allgather(dm_last is mpi.Message.SkippedArg)): dm_last = mpi.bcast_tagged_array(dm_last) if any(comm.allgather(vhf_last is mpi.Message.SkippedArg)): vhf_last = mpi.bcast_tagged_array(vhf_last) ground_state = (dm.ndim == 3 and dm.shape[0] == 2) if mf.grids.coords is None: mpi_rks._setup_grids_(mf, dm[0]+dm[1]) t0 = logger.timer(mf, 'setting up grids', *t0) if hermi == 2: # because rho = 0 n, exc, vxc = 0, 0, 0 else: n, exc, vxc = ni.nr_uks(mol, mf.grids, mf.xc, dm) n = comm.allreduce(n) exc = comm.allreduce(exc) vxc = mpi.reduce(vxc) logger.debug(mf, 'nelec by numeric integration = %s', n) t0 = logger.timer(mf, 'vxc', *t0) if abs(hyb) < 1e-10 and abs(alpha) < 1e-10: vk = None if getattr(vhf_last, 'vj', None) is not None: ddm = numpy.asarray(dm) - dm_last ddm = ddm[0] + ddm[1] vj = mf.get_j(mol, ddm, hermi) vj += vhf_last.vj else: vj = mf.get_j(mol, dm[0]+dm[1], hermi) vxc += vj else: if getattr(vhf_last, 'vk', None) is not None: ddm = numpy.asarray(dm) - dm_last vj, vk = mf.get_jk(mol, ddm, hermi) vj = vj[0] + vj[1] vk *= hyb if abs(omega) > 1e-10: vklr = mf.get_k(mol, ddm, hermi, omega=omega) vk += vklr * (alpha - hyb) ddm = None vj += vhf_last.vj vk += vhf_last.vk else: vj, vk = mf.get_jk(mol, dm, hermi) vj = vj[0] + vj[1] vk *= hyb if abs(omega) > 1e-10: vklr = mf.get_k(mol, dm, hermi, omega=omega) vk += vklr * (alpha - hyb) vxc += vj vxc -= vk if ground_state: exc -=(numpy.einsum('ij,ji', dm[0], vk[0]) + numpy.einsum('ij,ji', dm[1], vk[1])) * .5 if ground_state: ecoul = numpy.einsum('ij,ji', dm[0]+dm[1], vj) * .5 else: ecoul = None vxc = lib.tag_array(vxc, ecoul=ecoul, exc=exc, vj=vj, vk=vk) return vxc
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): mydf = _sync_mydf(mydf) cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if hasattr(dm_kpts, 'mo_coeff'): if dm_kpts.ndim == 3: # KRHF mo_coeff = [dm_kpts.mo_coeff] mo_occ = [dm_kpts.mo_occ ] else: # KUHF mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ elif hasattr(dm_kpts[0], 'mo_coeff'): mo_coeff = [dm.mo_coeff for dm in dm_kpts] mo_occ = [dm.mo_occ for dm in dm_kpts] else: mo_coeff = None kpts = numpy.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1./nkpts * (cell.vol/ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=dms.dtype) else: vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) coords = mydf.grids.coords ao2_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int(min(nao, max(1, (max_memory-mem_now)*1e6/16/4/ngrids/nao))) lib.logger.debug1(mydf, 'max_memory %s blksize %d', max_memory, blksize) ao1_dtype = numpy.result_type(*ao1_kpts) ao2_dtype = numpy.result_type(*ao2_kpts) vR_dm = numpy.empty((nset,nao,ngrids), dtype=vk_kpts.dtype) ao_dms_buf = [None] * nkpts tasks = [(k1,k2) for k2 in range(nkpts) for k1 in range(nband)] for k1, k2 in mpi.static_partition(tasks): ao1T = ao1_kpts[k1] ao2T = ao2_kpts[k2] kpt1 = kpts_band[k1] kpt2 = kpts[k2] if ao2T.size == 0 or ao1T.size == 0: continue # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2-kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2-kpt1, True, mydf, mesh) if is_zero(kpt1-kpt2): expmikr = numpy.array(1.) else: expmikr = numpy.exp(-1j * numpy.dot(coords, kpt2-kpt1)) if ao_dms_buf[k2] is None: if mo_coeff is None: ao_dms = [lib.dot(dm[k2], ao2T.conj()) for dm in dms] else: ao_dms = [] for i, dm in enumerate(dms): occ = mo_occ[i][k2] mo_scaled = mo_coeff[i][k2][:,occ>0] * numpy.sqrt(occ[occ>0]) ao_dms.append(lib.dot(mo_scaled.T, ao2T).conj()) ao_dms_buf[k2] = ao_dms else: ao_dms = ao_dms_buf[k2] if mo_coeff is None: for p0, p1 in lib.prange(0, nao, blksize): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao2T) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,nao,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real for i in range(nset): numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None else: for p0, p1 in lib.prange(0, nao, blksize): for i in range(nset): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao_dms[i].conj()) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,-1,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i,k1] += weight * lib.dot(vR_dm[i], ao1T.T) vk_kpts = mpi.reduce(lib.asarray(vk_kpts)) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = vk_kpts.real if rank == 0: if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_pp_loc_part1(mydf, kpts=None): mydf = _sync_mydf(mydf) vne = aft.get_pp_loc_part1(mydf, kpts) return mpi.reduce(vne)
def get_nuc(mydf, kpts=None): mydf = _sync_mydf(mydf) # Call the serial code because pw_loop and ft_loop methods are overloaded. vne = aft.get_nuc(mydf, kpts) return mpi.reduce(vne)
def get_pp_loc_part1(mydf, kpts=None): mydf = _sync_mydf(mydf) vne = aft.get_pp_loc_part1(mydf, kpts) return mpi.reduce(vne)
def get_veff(mf, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): t0 = (time.clock(), time.time()) mf.unpack_(comm.bcast(mf.pack())) mol = mf.mol ni = mf._numint if mf.nlc != '': raise NotImplementedError omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) if abs(omega) > 1e-10: # For range separated Coulomb operator raise NotImplementedError # Broadcast the large input arrays here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): if rank == 0 and dm is None: dm = mf.make_rdm1() dm = mpi.bcast_tagged_array(dm) if any(comm.allgather(isinstance(dm_last, str) and dm_last == 'SKIPPED_ARG')): dm_last = mpi.bcast_tagged_array(dm_last) if any(comm.allgather(isinstance(vhf_last, str) and vhf_last == 'SKIPPED_ARG')): vhf_last = mpi.bcast_tagged_array(vhf_last) ground_state = (isinstance(dm, numpy.ndarray) and dm.ndim == 2) if mf.grids.coords is None: _setup_grids_(mf, dm) t0 = logger.timer(mf, 'setting up grids', *t0) if hermi == 2: # because rho = 0 n, exc, vxc = 0, 0, 0 else: n, exc, vxc = ni.nr_rks(mol, mf.grids, mf.xc, dm) n = comm.allreduce(n) exc = comm.allreduce(exc) vxc = mpi.reduce(vxc) logger.debug(mf, 'nelec by numeric integration = %s', n) t0 = logger.timer(mf, 'vxc', *t0) if abs(hyb) < 1e-10 and abs(alpha) < 1e-10: vk = None if getattr(vhf_last, 'vj', None) is not None: ddm = numpy.asarray(dm) - dm_last vj = mpi.reduce(mf.get_j(mol, ddm, hermi)) vj += vhf_last.vj else: vj = mf.get_j(mol, dm, hermi) vj = mpi.reduce(vj) vxc += vj else: if getattr(vhf_last, 'vk', None) is not None: ddm = numpy.asarray(dm) - dm_last vj, vk = mf.get_jk(mol, ddm, hermi) ddm = None vj = mpi.reduce(vj) vk = mpi.reduce(vk) * hyb vj += vhf_last.vj vk += vhf_last.vk else: vj, vk = mf.get_jk(mol, dm, hermi) vj = mpi.reduce(vj) vk = mpi.reduce(vk) * hyb vxc += vj - vk * .5 if ground_state: exc -= numpy.einsum('ij,ji', dm, vk) * .5 * .5 if ground_state: ecoul = numpy.einsum('ij,ji', dm, vj) * .5 else: ecoul = None vxc = lib.tag_array(vxc, ecoul=ecoul, exc=exc, vj=vj, vk=vk) return vxc
def get_nuc(mydf, kpts=None): mydf = _sync_mydf(mydf) # Call the serial code because pw_loop and ft_loop methods are overloaded. vne = aft.get_nuc(mydf, kpts) return mpi.reduce(vne)
def _reduce_call(module, name, reg_procs, args, kwargs): import importlib from mpi4pyscf.tools import mpi result = mpi._distribute_call(module, name, reg_procs, args, kwargs) return mpi.reduce(result)
def _eval_jk(mf, dm, hermi, gen_jobs): cpu0 = (logger.process_clock(), logger.perf_counter()) mol = mf.mol ao_loc = mol.ao_loc_nr() nao = ao_loc[-1] bas_groups = _partition_bas(mol) jobs = gen_jobs(len(bas_groups), hermi) njobs = len(jobs) logger.debug1(mf, 'njobs %d', njobs) # Each job has multiple recipes. n_recipes = len(jobs[0][1:]) dm = numpy.asarray(dm).reshape(-1, nao, nao) n_dm = dm.shape[0] vk = numpy.zeros((n_recipes, n_dm, nao, nao)) if mf.opt is None: vhfopt = mf.init_direct_scf(mol) else: vhfopt = mf.opt # Assign the entire dm_cond to vhfopt. # The prescreen function CVHFnrs8_prescreen will index q_cond and dm_cond # over the entire basis. "set_dm" in function jk.get_jk/direct_bindm only # creates a subblock of dm_cond which is not compatible with # CVHFnrs8_prescreen. vhfopt.set_dm(dm, mol._atm, mol._bas, mol._env) # Then skip the "set_dm" initialization in function jk.get_jk/direct_bindm. vhfopt._dmcondname = None logger.timer_debug1(mf, 'get_jk initialization', *cpu0) for job_id in mpi.work_stealing_partition(range(njobs)): group_ids = jobs[job_id][0] recipes = jobs[job_id][1:] shls_slice = lib.flatten([bas_groups[i] for i in group_ids]) loc = ao_loc[shls_slice].reshape(4, 2) dm_blks = [] for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[0]] q0, q1 = loc[rec[1]] dm_blks.append(dm[i_dm, p0:p1, q0:q1]) scripts = [ 'ijkl,%s%s->%s%s' % tuple(['ijkl'[x] for x in rec]) for recipe in recipes for rec in recipe ] * n_dm kparts = jk.get_jk(mol, dm_blks, scripts, shls_slice=shls_slice, vhfopt=vhfopt) for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[2]] q0, q1 = loc[rec[3]] vk[ir, i_dm, p0:p1, q0:q1] += kparts[i] # Pop the results of one recipe kparts = kparts[i + 1:] vk = mpi.reduce(vk) if rank == 0: if hermi: for i in range(n_recipes): for j in range(n_dm): lib.hermi_triu(vk[i, j], hermi, inplace=True) else: # Zero out vk on workers. If reduce(get_jk()) is called twice, # non-zero vk on workers can cause error. vk[:] = 0 logger.timer(mf, 'get_jk', *cpu0) return vk
def get_pp(mydf, kpts=None): mydf = _sync_mydf(mydf) cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) if abs(kpts_lst).sum() < 1e-9: dtype = numpy.float64 else: dtype = numpy.complex128 mesh = mydf.mesh SI = cell.get_SI() Gv = cell.get_Gv(mesh) vpplocG = pseudo.get_vlocG(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', SI, vpplocG) ngrids = len(vpplocG) nao = cell.nao_nr() nkpts = len(kpts_lst) # vpploc evaluated in real-space vpplocR = tools.ifft(vpplocG, mesh).real vpp = numpy.zeros((nkpts, nao, nao), dtype=dtype) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts_lst): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): vpp[k] += lib.dot(ao.T.conj() * vpplocR[p0:p1], ao) ao = ao_ks = None vpp = mpi.reduce(lib.asarray(vpp)) # vppnonloc evaluated in reciprocal space fakemol = gto.Mole() fakemol._atm = numpy.zeros((1, gto.ATM_SLOTS), dtype=numpy.int32) fakemol._bas = numpy.zeros((1, gto.BAS_SLOTS), dtype=numpy.int32) ptr = gto.PTR_ENV_START fakemol._env = numpy.zeros(ptr + 10) fakemol._bas[0, gto.NPRIM_OF] = 1 fakemol._bas[0, gto.NCTR_OF] = 1 fakemol._bas[0, gto.PTR_EXP] = ptr + 3 fakemol._bas[0, gto.PTR_COEFF] = ptr + 4 # buf for SPG_lmi upto l=0..3 and nl=3 buf = numpy.empty((48, ngrids), dtype=numpy.complex128) def vppnl_by_k(kpt): Gk = Gv + kpt G_rad = lib.norm(Gk, axis=1) aokG = ft_ao.ft_ao(cell, Gv, kpt=kpt) * (ngrids / cell.vol) vppnl = 0 for ia in range(cell.natm): symb = cell.atom_symbol(ia) if symb not in cell._pseudo: continue pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: fakemol._bas[0, gto.ANG_OF] = l fakemol._env[ptr + 3] = .5 * rl**2 fakemol._env[ptr + 4] = rl**(l + 1.5) * numpy.pi**1.25 pYlm_part = dft.numint.eval_ao(fakemol, Gk, deriv=0) p0, p1 = p1, p1 + nl * (l * 2 + 1) # pYlm is real, SI[ia] is complex pYlm = numpy.ndarray((nl, l * 2 + 1, ngrids), dtype=numpy.complex128, buffer=buf[p0:p1]) for k in range(nl): qkl = pseudo.pp._qli(G_rad * rl, l, k) pYlm[k] = pYlm_part.T * qkl #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) if p1 > 0: SPG_lmi = buf[:p1] SPG_lmi *= SI[ia].conj() SPG_lm_aoGs = lib.zdot(SPG_lmi, aokG) p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: p0, p1 = p1, p1 + nl * (l * 2 + 1) hl = numpy.asarray(hl) SPG_lm_aoG = SPG_lm_aoGs[p0:p1].reshape( nl, l * 2 + 1, -1) tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) return vppnl * (1. / ngrids**2) vppnl = [] for kpt in mpi.static_partition(kpts_lst): vppnl.append(vppnl_by_k(kpt)) vppnl = mpi.gather(lib.asarray(vppnl)) if rank == 0: for k in range(nkpts): if dtype == numpy.float64: vpp[k] += vppnl[k].real else: vpp[k] += vppnl[k] if kpts is None or numpy.shape(kpts) == (3, ): vpp = vpp[0] return vpp
def _reduce_call(module, name, reg_procs, args, kwargs): import importlib from mpi4pyscf.tools import mpi result = mpi._distribute_call(module, name, reg_procs, args, kwargs) return mpi.reduce(result)