def get_k(mol_or_mf, dm, hermi=1): if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() with lib.temporary_env(mf.opt._this.contents, fprescreen=_vhf._fpointer('CVHFnrs8_vk_prescreen')): vk = _eval_jk(mf, dm, hermi, _vk_jobs_s8) return vk.reshape(dm.shape)
def get_k(mol_or_mf=None, dm=None, hermi=1, omega=None): if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(dm is mpi.Message.SkippedArg)): dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() with lib.temporary_env(mf.opt._this.contents, fprescreen=_vhf._fpointer('CVHFnrs8_vk_prescreen')): if omega is None: vk = _eval_jk(mf, dm, hermi, _vk_jobs_s8) else: with mf.mol.with_range_coulomb(omega): vk = _eval_jk(mf, dm, hermi, _vk_jobs_s8) return vk.reshape(dm.shape)
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8) if rank == 0: for i in range(vj.shape[0]): lib.hermi_triu(vj[i], 1, inplace=True) return vj.reshape(dm.shape), vk.reshape(dm.shape)
def __init__(self, **kw): """ Constructor a self-consistent field """ self.perform_scf = kw['perform_scf'] if 'perform_scf' in kw else False self.kmat_algo = kw['kmat_algo'] if 'kmat_algo' in kw else None self.kmat_timing = 0.0 if 'kmat_timing' in kw else None for x in ['xc_code', 'dealloc_hsx', 'dtype']: kw.pop(x, None) tddft_iter.__init__(self, dtype=np.float64, xc_code='RPA', dealloc_hsx=False, **kw) #print(__name__, ' dtype ', self.dtype) self.xc_code_kernel = copy(self.xc_code) self.xc_code = self.xc_code_mf self.dm_mf = self.make_rdm1( ) # necessary to get_hcore(...) in case of pp starting point if self.gen_pb: self.hkernel_den = pack2den_u(self.kernel) if self.nspin == 1: self.pyscf_scf = hf.SCF(self) else: self.pyscf_scf = uhf.UHF(self) self.pyscf_scf.direct_scf = False # overriding the attributes from hf.SCF ... self.pyscf_scf.get_hcore = self.get_hcore self.pyscf_scf.get_ovlp = self.get_ovlp self.pyscf_scf.get_j = self.get_j self.pyscf_scf.get_jk = self.get_jk self.pyscf_scf.energy_nuc = self.energy_nuc if self.perform_scf: self.kernel_scf(**kw)
def get_jk(mol_or_mf, dm, hermi, dmcur, *args, **kwargs): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.2 # set block size to adapt memory sblk = 200 # interspace betweeen v shell intsp = 1 # threshold for u and v gthrdu = 1e-10 gthrdvs = 1e-10 gthrdvd = 1e-10 global cond, wao_vx, ngridsx, coordsx, gridatm dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J and sgX set mf.with_df = mf mol = mf.mol global int2c, ovlp, ao_loc, rao_loc # use mf.opt to calc int2c once, cond, dm0, and rao, ao_loc, ovlp for sgX if mf.opt is None: mf.opt = mf.init_direct_scf() cond = 0 # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis',auxmol.basis,'number of aux basis',int2c.shape[0]) # for sgX # ao_loc and rao_loc intbn = mol._add_suffix('int3c2e') intbn = gto.moleintor.ascint3(intbn) ao_loc = gto.moleintor.make_loc(mol._bas, intbn) #print('dsssa',mol.nbas, ao_loc.shape,ao_loc[0],ao_loc[-1],ao_loc[1],ao_loc[2],ao_loc[3],ao_loc[115]) rao_loc = numpy.zeros((nao),dtype=int) for i in range(mol.nbas): for j in range(ao_loc[i],ao_loc[i+1]): rao_loc[j] = i ovlp = mol.intor_symmetric('int1e_ovlp') if rank == 0: print('thrd_nddm',thrd_nddm, 'sblk',sblk, 'intsp',intsp, 'gthrdu',gthrdu) # coase and fine grids change grdchg = 0 norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k]) if norm_ddm < thrd_nddm and cond == 2 : cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, gridatm = get_gridss(mol,grdlvl_i, sblk) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, gridatm = get_gridss(mol,grdlvl_f, sblk) if rank == 0: print('grids level change to', grdlvl_f) dms = numpy.asarray(dmcur) dms = dms.reshape(-1,nao,nao) grdchg = 1 cond = 3 # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k]+dms[k].T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape #if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0,paux],rec[k],root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) # sgX wao_v = wao_vx coords = coordsx for k in range(nset): # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] for ii in range(gridatm.shape[0]-1): i0 = gridatm[ii] i1 = gridatm[ii+1] # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao_v[i0:i1]), axis=0) usi = numpy.argwhere(umaxg > gthrdu).reshape(-1) # screening v by dm and ovlp then triangle matrix bn uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > gthrdvs).reshape(-1) udms = dms[k][usi, :] dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > gthrdvd).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) #vshbeg = vsh[0] vshfin = vsh[-1]+1 # use gap between continurous v to save time vsh1 = vsh vsh1= numpy.delete(vsh1, 0) vsh1 = numpy.append(vsh1, [vshfin]) vshd = numpy.argwhere(vsh1-vsh > intsp) vshd = numpy.append(vshd, vsh.shape[0]-1) nvshd = vshd.shape[0] #vbeg = ao_loc[vshbeg] vfin = ao_loc[vshfin] fakemol = gto.fakemol_for_charges(coords[i0:i1]) pmol = gto.mole.conc_mol(mol, fakemol) bn = [] dmsk = [] bntp = [[0 for col in range(nvshd)] for row in range(nvshd)] for i in range(nvshd): if i==0: ii0 = vsh[0] ii1 = vsh[vshd[0]]+1 else: ii0 = vsh[vshd[i-1]+1] ii1 = vsh[vshd[i]]+1 dmsk.append(dms[k][:,ao_loc[ii0]:ao_loc[ii1]]) bnh = [] for j in range(0, i): bnh.append(bntp[j][i].swapaxes(0,1)) for j in range(i, nvshd): if j==0: jj0 = vsh[0] jj1 = vsh[vshd[0]]+1 else: jj0 = vsh[vshd[j-1]+1] jj1 = vsh[vshd[j]]+1 shls_slice = (ii0, ii1, jj0, jj1, mol.nbas, mol.nbas+fakemol.nbas) bntp[i][j] = pmol.intor(intor='int3c2e', comp=1, aosym='s1', shls_slice=shls_slice) bnh.append(bntp[i][j]) bnrow = numpy.concatenate(bnh, axis=1) bn.append(bnrow) bn = numpy.concatenate(bn, axis=0) abn = numpy.absolute(bn) #if cond==3: print(rank,'wet',numpy.amax(abn), numpy.median(abn)) dmsk = numpy.asarray(numpy.hstack(dmsk)) fg = numpy.dot(wao_v[i0:i1,usi],dmsk[usi]) gv = lib.einsum('vtg,gt->gv', bn, fg) vk0 = numpy.zeros((nao,nao)) vksp = lib.einsum('gu,gv->uv', wao_v[i0:i1,usi], gv) blen = 0 for i in range(nvshd): if i==0: ii0 = vsh[0] ii1 = vsh[vshd[0]]+1 else: ii0 = vsh[vshd[i-1]+1] ii1 = vsh[vshd[i]]+1 baa = ao_loc[ii1]-ao_loc[ii0] vk0[usi,ao_loc[ii0]:ao_loc[ii1]] = vksp[:,blen:(blen+baa)] blen += baa vk[k] += vk0 else: vk0 = numpy.zeros((nao,nao)) vk[k] += vk0 sn = lib.einsum('gu,gv->uv', wao_v, wao_v) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) vk[k] = numpy.matmul(ovlp, snsgk) if rank == 0: vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) #if cond==3: cond=4 return vj, vk, grdchg
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) global eri1 with_j=True with_k=True if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() mf.with_df = mf mol = mf.mol global int2c # use sttr to calc int2c once if not hasattr(dm, 'mo_coeff'): # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) naux0 = int2c.shape[0] fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2 fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2 null = lib.c_null_ptr() dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset if hasattr(dm, 'mo_coeff'): #TODO: test whether dm.mo_coeff matching dm mo_coeff = numpy.asarray(dm.mo_coeff, order='F') mo_occ = numpy.asarray(dm.mo_occ) nmo = mo_occ.shape[-1] mo_coeff = mo_coeff.reshape(-1,nao,nmo) mo_occ = mo_occ.reshape(-1,nmo) if mo_occ.shape[0] * 2 == nset: # handle ROHF DM mo_coeff = numpy.vstack((mo_coeff, mo_coeff)) mo_occa = numpy.array(mo_occ> 0, dtype=numpy.double) mo_occb = numpy.array(mo_occ==2, dtype=numpy.double) assert(mo_occa.sum() + mo_occb.sum() == mo_occ.sum()) mo_occ = numpy.vstack((mo_occa, mo_occb)) dmtril = [] orbo = [] orbo0 = [] kiv = [] for k in range(nset): c = numpy.einsum('pi,i->pi', mo_coeff[k][:,mo_occ[k]>0], numpy.sqrt(mo_occ[k][mo_occ[k]>0])) orbo.append(numpy.asarray(c, order='F')) orbo0.append(numpy.asarray(mo_coeff[k][:,mo_occ[k]>0], order='F')) nocc = orbo[k].shape[1] kiv.append(numpy.zeros((nocc,nao))) rho = [] split = [] buf1 = [] # for eri1 in loop(mf.with_df): if 1==1: naux, nao_pair = eri1.shape assert(nao_pair == nao*(nao+1)//2) for k in range(nset): nocc = orbo[k].shape[1] if nocc > 0: buf1.append(numpy.empty((naux*nocc,nao))) fdrv(ftrans, fmmm, buf1[k].ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), orbo[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), (ctypes.c_int*4)(0, nocc, 0, nao), null, ctypes.c_int(0)) buf2r = lib.dot(buf1[k], orbo0[k]).reshape(naux,nocc,-1) buf2r = lib.pack_tril(buf2r) # gather buf2r split.append(numpy.array_split(buf2r,mpi.pool.size,axis = 1)) # global iokr, rec for grad global iokr, rec iokr = [] rec = [] for k in range(nset): for i in range(mpi.pool.size): obuf2r0 = mpi.gather(split[k][i],root=i) if rank == i: obuf2r = obuf2r0 obuf2r0 = None split[k] = None # k iok = scipy.linalg.solve(int2c, obuf2r) for i in range(mpi.pool.size): if i == 0: j0 = 0 else: j0 = pauxz[i-1] j1 = pauxz[i] iok0 = mpi.gather(iok[j0:j1].reshape(-1,order='F'),root=i) if rank == i: iokx = lib.unpack_tril(iok0.reshape((naux,-1),order='F')) iok0 = None iok = None nocc = orbo[k].shape[1] # j rec.append(numpy.einsum('kii->ki', iokx.reshape(naux,nocc,-1)).dot (numpy.sqrt(mo_occ[k][mo_occ[k]>0]))) iokr.append(iokx.reshape(naux*nocc,-1)) iokx = None if 1==1: naux, nao_pair = eri1.shape assert(nao_pair == nao*(nao+1)//2) for k in range(nset): nocc = orbo[k].shape[1] if nocc > 0: kiv[k] += lib.dot(iokr[k].T, buf1[k]) if with_j: vj[k] += numpy.dot(rec[k].T, eri1) # vj[k] += numpy.tensordot(rec[k], eri1, axes=([0],[0])) for k in range(nset): kiv[k] = comm.reduce(kiv[k]) # project iv -> uv if rank == 0: kij = lib.einsum('ui,ju->ij', orbo0[k], kiv[k]) kr = scipy.linalg.solve(kij, kiv[k]) vk[k] = lib.dot(kiv[k].T,kr) else: vk[k] = None vj[k] = comm.reduce(vj[k]) else: dmtril = [] for k in range(nset): if with_j: dmtril.append(lib.pack_tril(dms[k]+dms[k].T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 rho = [] for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape #print('slice-naux',naux,'rank',rank,auxmol.basis) assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if with_j: rho.append(numpy.dot(eri1, dmtril[k])) orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(naux)) comm.Scatterv([ivj0,paux],rec[k],root=0) if 1==1: naux, nao_pair = eri1.shape assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if with_j: vj[k] += numpy.dot(rec[k].T, eri1) for k in range(nset): vj[k] = comm.reduce(vj[k]) vk = numpy.zeros(dm_shape) if rank == 0: if with_j: vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) if with_k: vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk
def get_jkgrd(mol_or_mf, dm, mo_coeff=None, mo_occ=None): '''MPI version of scf.hf.get_jk function''' if rank == 0: print('uuuu00', lib.current_memory()[0]) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() mf.with_df = mf mol = mf.mol auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) nao = mol.nao_nr() naux = auxmol.nao_nr() if rank == 0: print('number of AOs', nao) print('number of auxiliary basis functions', naux) # (d/dX i,j|P) # int3c_e1 = df.incore.aux_e2(mol, auxmol, intor='int3c2e_ip1', aosym='s1', comp=3) int3c_e1 = loop_aux(mf, intor='int3c2e_ip1', aosym='s1', comp=3) # (i,j|d/dX P) # int3c_e2 = df.incore.aux_e2(mol, auxmol, intor='int3c2e_ip2', aosym='s1', comp=3) int3c_e2 = loop_aux(mf, intor='int3c2e_ip2', aosym='s1', comp=3) # (d/dX P|Q) int2c_e1 = auxmol.intor('int2c2e_ip1', aosym='s1', comp=3) fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2 fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2 null = lib.c_null_ptr() dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset if 0 == 0: nmo = mo_occ.shape[-1] mo_coeff = mo_coeff.reshape(-1, nao, nmo) mo_occ = mo_occ.reshape(-1, nmo) if mo_occ.shape[0] * 2 == nset: # handle ROHF DM mo_coeff = numpy.vstack((mo_coeff, mo_coeff)) mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double) mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double) assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum()) mo_occ = numpy.vstack((mo_occa, mo_occb)) dmtril = [] orbo = [] orbo0 = [] kiv = [] for k in range(nset): c = numpy.einsum('pi,i->pi', mo_coeff[k][:, mo_occ[k] > 0], numpy.sqrt(mo_occ[k][mo_occ[k] > 0])) orbo.append(numpy.asarray(c, order='F')) orbo0.append( numpy.asarray(mo_coeff[k][:, mo_occ[k] > 0], order='F')) nocc = orbo[k].shape[1] kiv.append(numpy.zeros((nocc, nao))) rho = [] split = [] buf1 = [] iokr = mpi_occ_df_incore.iokr rec = mpi_occ_df_incore.rec k = 0 dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k] + dms[k].T)) i = numpy.arange(nao) dmtril[k][i * (i + 1) // 2 + i] *= .5 tmp0 = int3c_e2.swapaxes(1, 3).reshape(-1, nao * nao) tmp = numpy.dot(tmp0, dm.reshape(-1)) tmp0 = numpy.einsum('xp,p->xp', tmp.reshape(3, -1), rec[k]) ec1_3cp = mpi.gather(tmp0.reshape(-1, order='F')).reshape((3, -1), order='F') ec1_3cu_b = numpy.einsum('xuvp,p,uv->xu', int3c_e1, rec[k], dm) ec1_3cu = comm.reduce(ec1_3cu_b) if rank == 0: j0 = 0 else: j0 = pauxz[rank - 1] j1 = pauxz[rank] tmp0 = int2c_e1[:, :, j0:j1].dot(rec[k]) tmp = comm.allreduce(tmp0) tmp0 = numpy.einsum('xp,p->xp', tmp[:, j0:j1], rec[k]) ec1_2c = mpi.gather(tmp0.reshape(-1, order='F')).reshape((3, -1), order='F') coeff3mo = numpy.sqrt(2.0e0) * (iokr[k].reshape(-1, nocc, nocc)) tmp = numpy.tensordot(coeff3mo, orbo[k], axes=([2], [1])) coeffb = numpy.tensordot(tmp, orbo[k], axes=([1], [1])) ex1_3cp_b = numpy.einsum('puv,xuvp->xp', coeffb, int3c_e2) ex1_3cp = mpi.gather(ex1_3cp_b.reshape(-1, order='F')).reshape( (3, -1), order='F') ex1_3cu_b = numpy.einsum('puv,xuvp->xu', coeffb, int3c_e1) ex1_3cu = comm.reduce(ex1_3cu_b) tmp0 = mpi.allgather(coeff3mo) tmp = numpy.tensordot(tmp0, coeff3mo, axes=([1, 2], [1, 2])) ex1_2c_b = numpy.einsum('xpq,pq->xp', int2c_e1[:, :, j0:j1], tmp) ex1_2c = comm.reduce(ex1_2c_b) eaux1p = numpy.empty((3, naux)) eaux1u = numpy.empty((3, naux)) if rank == 0: eaux1p = -0.5 * (2 * ec1_3cp - 2 * ec1_2c - 0.5 * (2 * ex1_3cp - 2 * ex1_2c)) eaux1u = -0.5 * (4 * ec1_3cu - 0.5 * (4 * ex1_3cu)) return eaux1p, eaux1u
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.03 # set block size to adapt memory sblk = 200 global cond, wao_vx, ngridsx, coordsx, gthrd, dm0 dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J set mf.with_df = mf mol = mf.mol global int2c # use mf.opt to calc int2c once, cond, dm0 if mf.opt is None: mf.opt = mf.init_direct_scf() cond = 0 dm0 = numpy.zeros((nset, nao, nao)) # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis', auxmol.basis) # coase and fine grids change norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k] - dm0[k]) dm0 = dms if norm_ddm < thrd_nddm and cond == 2: cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_i) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_f) if rank == 0: print('grids level change to', grdlvl_f) cond = 3 # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k] + dms[k].T)) i = numpy.arange(nao) dmtril[k][i * (i + 1) // 2 + i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape # if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert (nao_pair == nao * (nao + 1) // 2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0, paux], rec[k], root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert (nao_pair == nao * (nao + 1) // 2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) # sgX for k in range(nset): # screening from Fg fg = numpy.dot(wao_vx, dms[k]) sngds = [] ss = 0 for i in range(ngridsx): if numpy.amax(numpy.absolute(fg[i, :])) < gthrd: sngds.append(i) ss += 1 if ss < ngridsx: wao_v = numpy.delete(wao_vx, sngds, 0) fg = numpy.delete(fg, sngds, 0) coords = numpy.delete(coordsx, sngds, 0) else: wao_v = wao_vx coords = coordsx # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] blksize = min(ngrids, sblk) for i0, i1 in lib.prange(0, ngrids, blksize): bn = batch_nuc(mol, coords[i0:i1]) gbn = bn.swapaxes(0, 2) gv = lib.einsum('gvt,gt->gv', gbn, fg[i0:i1]) vk[k] += lib.einsum('gu,gv->uv', wao_v[i0:i1], gv) sn = lib.einsum('gu,gv->uv', wao_v, wao_v) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) ovlp = mol.intor_symmetric('int1e_ovlp') vk[k] = numpy.matmul(ovlp, snsgk) if rank == 0: vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 1 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.03 # set block size to adapt memory sblk = 200 global cond, wao_vx, ngridsx, coordsx, gthrd, dm0 dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset mol = mf.mol if mf.opt is None: mf.opt = mf.init_direct_scf() cond = 0 dm0 = numpy.zeros((nset, nao, nao)) # coase and fine grids change norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k] - dm0[k]) dm0 = dms if norm_ddm < thrd_nddm and cond == 2: cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_i) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_f) if rank == 0: print('grids level change to', grdlvl_f) cond = 3 for k in range(nset): # screening from Fg fg = numpy.dot(wao_vx, dms[k]) sngds = [] ss = 0 for i in range(ngridsx): if numpy.amax(numpy.absolute(fg[i, :])) < gthrd: sngds.append(i) ss += 1 if ss < ngridsx: wao_v = numpy.delete(wao_vx, sngds, 0) fg = numpy.delete(fg, sngds, 0) coords = numpy.delete(coordsx, sngds, 0) else: wao_v = wao_vx coords = coordsx # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] blksize = min(ngrids, sblk) for i0, i1 in lib.prange(0, ngrids, blksize): bn = batch_nuc(mol, coords[i0:i1]) gbn = bn.swapaxes(0, 2) jg = numpy.dot(gbn.reshape(-1, nao * nao), dms[k].reshape(-1)) xj = lib.einsum('gv,g->gv', wao_v[i0:i1], jg) vj[k] += lib.einsum('gu,gv->uv', wao_v[i0:i1], xj) gv = lib.einsum('gvt,gt->gv', gbn, fg[i0:i1]) vk[k] += lib.einsum('gu,gv->uv', wao_v[i0:i1], gv) sn = lib.einsum('gu,gv->uv', wao_v, wao_v) # comm.Barrier() vj[k] = comm.reduce(vj[k]) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: # sn = lib.einsum('gu,gv->uv', wao_v, wao_v) snsgk = scipy.linalg.solve(sn, vk[k]) ovlp = mol.intor_symmetric('int1e_ovlp') vk[k] = numpy.matmul(ovlp, snsgk) snsgj = scipy.linalg.solve(sn, vj[k]) vj[k] = numpy.matmul(ovlp, snsgj) if rank == 0: vj = numpy.asarray(vj).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk
def get_jk(mol_or_mf, dm, hermi, dmcur): #, *args, **kwargs '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): #dm = mpi.bcast_tagged_array_occdf(dm) dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.01 # set block size to adapt memory sblk = 100 # interspace betweeen v shell intsp = 1 # threshold for u and v gthrdu = 1e-7 gthrdvs = 1e-4 gthrdvd = 1e-4 global cond, wao_vx, ngridsx, coordsx, weightsx dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J and sgX set mf.with_df = mf mol = mf.mol global int2c, ovlp, ao_loc, rao_loc, ov_scr # need set mf.initsgx = None in scf.SCF __init_ if mf.initsgx is None: mf.initsgx = 0 cond = 0 # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis',auxmol.basis,'number of aux basis',int2c.shape[0]) # for sgX # ao_loc and rao_loc intbn = mol._add_suffix('int3c2e') intbn = gto.moleintor.ascint3(intbn) ao_loc = gto.moleintor.make_loc(mol._bas, intbn) rao_loc = numpy.zeros((nao),dtype=int) ovlp = mol.intor_symmetric('int1e_ovlp') for i in range(mol.nbas): for jj in range(ao_loc[i],ao_loc[i+1]): rao_loc[jj] = i # ovlp screening ov_scr = numpy.zeros((mol.nbas,mol.nbas),dtype=int) for i in range(mol.nbas): for j in range(mol.nbas): if mol.bas_atom(i) == mol.bas_atom(j): ov_scr[i,j] = 1 else: movlp = numpy.amax(numpy.absolute(ovlp[ao_loc[i]:ao_loc[i+1],ao_loc[j]:ao_loc[j+1]])) if movlp > gthrdvs: ov_scr[i,j] = 1 if rank == 0: print('thrd_nddm',thrd_nddm, 'sblk',sblk, 'intsp',intsp, 'gthrdu',gthrdu) if rank == 0: print('gthrdvs',gthrdvs, 'gthrdvd',gthrdvd) # coase and fine grids change grdchg = 0 norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k]) if norm_ddm < thrd_nddm and cond == 2 : cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, weightsx = get_gridss(mol,grdlvl_i, sblk) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, weightsx = get_gridss(mol,grdlvl_f, sblk) if rank == 0: print('grids level change to', grdlvl_f) dms = numpy.asarray(dmcur) dms = dms.reshape(-1,nao,nao) grdchg = 1 cond = 3 if rank==0: Jtime=time.time() # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k]+dms[k].T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape #if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0,paux],rec[k],root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) if rank==0: print "Took this long for J: ", time.time()-Jtime if rank==0: Jtime=time.time() # sgX wao_v = wao_vx coords = coordsx weights = weightsx for k in range(nset): '''# Plus density screening ov_scr = numpy.zeros((mol.nbas,mol.nbas),dtype=int) for i in range(mol.nbas): for j in range(mol.nbas): movlp = numpy.amax(numpy.absolute(dms[k][ao_loc[i]:ao_loc[i+1],ao_loc[j]:ao_loc[j+1]])) if movlp > gthrdvd: ov_scr[i,j] = 1''' # xfg screening ngrids = coords.shape[0] blksize = min(ngrids, sblk) gscr = [] for i0, i1 in lib.prange(0, ngrids, blksize): # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao_v[i0:i1]), axis=0) usi = numpy.argwhere(umaxg > gthrdu).reshape(-1) if len(usi) != 0: # screening v by ovlp then triangle matrix bn uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > gthrdvs).reshape(-1) udms = dms[k][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > gthrdvd).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) vshi = [] for i in range(vsh.shape[0]): ista = ao_loc[vsh[i]] iend = ao_loc[vsh[i]+1] vshi.append(numpy.arange(ista, iend)) vshi = numpy.asarray(numpy.hstack(vshi)) dmsi = dms[k][usi] fg = weights[i0:i1,None] * numpy.dot(wao_v[i0:i1,usi],dmsi[:,vshi]) gmaxfg = numpy.amax(numpy.absolute(fg), axis=1) gmaxwao_v = numpy.amax(numpy.absolute(wao_v[i0:i1,usi]), axis=1) gmaxtt = gmaxfg * gmaxwao_v gscr0 = numpy.argwhere(gmaxtt > gthrdu).reshape(-1) if gscr0.shape[0] > 0: gscr.append(gscr0 + i0) hgscr = numpy.hstack(gscr).reshape(-1) coords = mpi.gather(coords[hgscr]) wao_v = mpi.gather(wao_v[hgscr]) weights = mpi.gather(weights[hgscr]) if rank == 0: print('screened grids', coords.shape[0]) coords = numpy.array_split(coords, mpi.pool.size) wao_v = numpy.array_split(wao_v, mpi.pool.size) weights = numpy.array_split(weights, mpi.pool.size) coords = mpi.scatter(coords) wao_v = mpi.scatter(wao_v) weights = mpi.scatter(weights) # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] for i0, i1 in lib.prange(0, ngrids, blksize): # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao_v[i0:i1]), axis=0) usi = numpy.argwhere(umaxg > gthrdu).reshape(-1) if len(usi) != 0: # screening v by ovlp then triangle matrix bn uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > gthrdvs).reshape(-1) udms = dms[k][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > gthrdvd).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) nvsh = vsh.shape[0] vov0 = ov_scr[vsh] vov = vov0[:,vsh] vshi = [] xvsh = vsh ivx = [0] vx = 0 for i in range(vsh.shape[0]): ista = ao_loc[vsh[i]] iend = ao_loc[vsh[i]+1] vshi.append(numpy.arange(ista, iend)) vx += iend - ista ivx.append(vx) vshi = numpy.asarray(numpy.hstack(vshi)) nvshi = vshi.shape[0] #print('ee',nvshi) ivx = numpy.asarray(ivx) vshbeg = vsh[0] vshfin = vsh[-1]+1 dmsi = dms[k][usi] fg = weights[i0:i1,None] * numpy.dot(wao_v[i0:i1,usi],dmsi[:,vshi]) fakemol = gto.fakemol_for_charges(coords[i0:i1]) #pmol = gto.mole.conc_mol(mol, fakemol) intor = mol._add_suffix('int3c2e') atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env, fakemol._atm, fakemol._bas, fakemol._env) shls_slice = (vshbeg, vshfin, vshbeg, vshfin, mol.nbas, mol.nbas+fakemol.nbas) comp=1 #aosym='s1' aosym='s2ij' if aosym == 's2ij': gv = getints3c_scr(intor, atm, bas, env, shls_slice, comp, xvsh, nvshi, ivx, vov, fg, aosym) else: gv = getints3c_scr(intor, atm, bas, env, shls_slice, comp, xvsh, nvshi, ivx, vov, fg, aosym) vk0 = numpy.zeros((nao,nao)) vksp = lib.einsum('gu,gv->uv', wao_v[i0:i1,usi], gv) vk1 = vk0[usi] vk1[:,vshi] = vksp vk0[usi] = vk1 vk[k] += vk0 wao_vw = weights[:,None] * wao_v sn = lib.einsum('gu,gv->uv', wao_v, wao_vw) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) vk[k] = numpy.matmul(ovlp, snsgk) if hermi == 1: vk[k] = (vk[k] + vk[k].T)*.5 if rank == 0: print "Took this long for K: ", time.time()-Jtime vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk, grdchg