def _setup_grids_(mf, dm): mol = mf.mol grids = mf.grids if rank == 0: grids.build(with_non0tab=False) grids.coords = numpy.array_split(grids.coords, mpi.pool.size) grids.weights = numpy.array_split(grids.weights, mpi.pool.size) grids.coords = mpi.scatter(grids.coords) grids.weights = mpi.scatter(grids.weights) ground_state = (isinstance(dm, numpy.ndarray) and dm.ndim == 2) if mf.small_rho_cutoff > 1e-20 and ground_state: rho = mf._numint.get_rho(mol, dm, grids, mf.max_memory) n = comm.allreduce(numpy.dot(rho, grids.weights)) if abs(n-mol.nelectron) < rks.NELEC_ERROR_TOL*n: rw = mpi.gather(rho * grids.weights) idx = abs(rw) > mf.small_rho_cutoff / grids.weights.size logger.alldebug1(mf, 'Drop grids %d', grids.weights.size - numpy.count_nonzero(idx)) grids.coords = numpy.asarray(grids.coords [idx], order='C') grids.weights = numpy.asarray(grids.weights[idx], order='C') grids.non0tab = grids.make_mask(mol, grids.coords) return grids
def get_gridss(mol, lvl): mfg = dft.RKS(mol) mfg.grids.level = lvl grids = mfg.grids if rank == 0: grids.build() print('ngrids is', grids.coords.shape) grids.coords = numpy.array_split(grids.coords, mpi.pool.size) grids.weights = numpy.array_split(grids.weights, mpi.pool.size) grids.coords = mpi.scatter(grids.coords) grids.weights = mpi.scatter(grids.weights) coords0 = mfg.grids.coords ngrids0 = coords0.shape[0] weights = mfg.grids.weights ao_v = dft.numint.eval_ao(mol, coords0) # wao=w**0.5 * ao xx = numpy.sqrt(abs(weights)).reshape(-1, 1) wao_v0 = xx * ao_v # Ktime=time.time() # threshold for Xg and Fg gthrd = 1e-10 if rank == 0: print('threshold for grids screening', gthrd) sngds = [] ss = 0 for i in range(ngrids0): if numpy.amax(numpy.absolute(wao_v0[i, :])) < gthrd: sngds.append(i) ss += 1 wao_vx = numpy.delete(wao_v0, sngds, 0) coordsx = numpy.delete(coords0, sngds, 0) # print ("Took this long for Xg screening: ", time.time()-Ktime) ngridsx = coordsx.shape[0] return wao_vx, ngridsx, coordsx, gthrd
def get_gridss(mol,lvl, sblk): grids = dft.gen_grid.Grids(mol) grids.level = lvl if rank == 0: grids.build() print('ngrids is',grids.coords.shape) grids.coords = numpy.array_split(grids.coords, mpi.pool.size) grids.weights = numpy.array_split(grids.weights, mpi.pool.size) grids.coords = mpi.scatter(grids.coords) grids.weights = mpi.scatter(grids.weights) coords0 = grids.coords ngrids0 = coords0.shape[0] weights = grids.weights ao_v = dft.numint.eval_ao(mol, coords0) wao_v0 = ao_v return wao_v0, ngrids0, coords0, weights
def get_gridss(mol,lvl, sblk): mfg = dft.RKS(mol) mfg.grids.level = lvl grids = mfg.grids crdnum = numpy.asarray([]) if rank == 0: grids.build() print('ngrids is',grids.coords.shape) crdnum = numpy.array_split(numpy.asarray(range(grids.coords.shape[0])), mpi.pool.size) grids.coords = numpy.array_split(grids.coords, mpi.pool.size) grids.weights = numpy.array_split(grids.weights, mpi.pool.size) crdnum = mpi.scatter(crdnum) grids.coords = mpi.scatter(grids.coords) grids.weights = mpi.scatter(grids.weights) coords0 = mfg.grids.coords ngrids0 = coords0.shape[0] weights = mfg.grids.weights ao_v = dft.numint.eval_ao(mol, coords0) # wao=w**0.5 * ao xx = numpy.sqrt(abs(weights)).reshape(-1,1) wao_v0 = xx*ao_v # split grids by atom then by sblk mfa = dft.gen_grid.Grids(mol) atom_grids_tab = mfa.gen_atomic_grids(mol, level = lvl) aaa=0 gridatm = [0] for ia in range(mol.natm): coordsa, vol = atom_grids_tab[mol.atom_symbol(ia)] aaa += coordsa.shape[0] gridatm.append(aaa) gridatm = numpy.intersect1d(crdnum, numpy.asarray(gridatm)) gridatm -= numpy.asarray([crdnum[0]]*gridatm.shape[0], dtype=int) gridatm = numpy.unique(numpy.append(gridatm,[0,crdnum.shape[0]])) hsblk = sblk // 2 for ii in range(gridatm.shape[0]-1): i = gridatm[ii] while i+sblk < gridatm[ii+1]-hsblk: i += sblk gridatm = numpy.append(gridatm, i) gridatm = numpy.unique(gridatm) return wao_v0, ngrids0, coords0, gridatm
def scatter(n, m): import numpy from mpi4pyscf.tools import mpi mpi.INT_MAX = 7 if mpi.rank == 0: arrs = [numpy.ones((n + i, m - i)) for i in range(mpi.pool.size)] else: arrs = None res = mpi.scatter(arrs) print(res.shape)
def distribute_amplitudes_(mycc, t1=None, t2=None): '''Distribute the entire t2 amplitudes tensor (nocc,nocc,nvir,nvir) to different processes ''' _sync_(mycc) if rank == 0: if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 nocc = t2.shape[0] nvir = t2.shape[2] t2T = t2.transpose(2, 3, 0, 1) t2_all = [] for task_id in range(mpi.pool.size): loc0, loc1 = _task_location(nvir, task_id) t2_all.append(t2T[loc0:loc1]) t2T = mpi.scatter(t2_all) mpi.bcast(t1) else: t2T = mpi.scatter(None) t1 = mpi.bcast(None) mycc.t1 = t1 mycc.t2 = t2T.transpose(2, 3, 0, 1) return mycc.t2
def distribute_amplitudes_(mycc, t1=None, t2=None): '''Distribute the entire t2 amplitudes tensor (nocc,nocc,nvir,nvir) to different processes ''' _sync_(mycc) if rank == 0: if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 nocc = t2.shape[0] nvir = t2.shape[2] t2T = t2.transpose(2,3,0,1) t2_all = [] for task_id in range(mpi.pool.size): loc0, loc1 = _task_location(nvir, task_id) t2_all.append(t2T[loc0:loc1]) t2T = mpi.scatter(t2_all) mpi.bcast(t1) else: t2T = mpi.scatter(None) t1 = mpi.bcast(None) mycc.t1 = t1 mycc.t2 = t2T.transpose(2,3,0,1) return mycc.t2
def get_jk(mol_or_mf, dm, hermi, dmcur, *args, **kwargs): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.03 # set block size to adapt memory sblk = 200 global cond, wao_vx, ngridsx, coordsx, gthrd dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J set mf.with_df = mf mol = mf.mol global int2c # use mf.opt to calc int2c once, cond, dm0 if mf.opt is None: mf.opt = mf.init_direct_scf() cond = 0 # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis',auxmol.basis) # coase and fine grids change grdchg = 0 norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k]) if norm_ddm < thrd_nddm and cond == 2 : cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol,grdlvl_i) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol,grdlvl_f) if rank == 0: print('grids level change to', grdlvl_f) dms = numpy.asarray(dmcur) dms = dms.reshape(-1,nao,nao) grdchg = 1 cond = 3 # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k]+dms[k].T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape # if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0,paux],rec[k],root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) # sgX for k in range(nset): # screening from Fg fg = numpy.dot(wao_vx, dms[k]) fg0 = numpy.amax(numpy.absolute(fg), axis=1) sngds = numpy.argwhere(fg0 < gthrd) if sngds.shape[0] < ngridsx: wao_v = numpy.delete(wao_vx, sngds, 0) fg = numpy.delete(fg, sngds, 0) coords = numpy.delete(coordsx, sngds, 0) else: wao_v = wao_vx coords = coordsx # rescatter data coords = mpi.gather(coords) wao_v = mpi.gather(wao_v) fg = mpi.gather(fg) if rank == 0: coords = numpy.array_split(coords, mpi.pool.size) wao_v = numpy.array_split(wao_v, mpi.pool.size) fg = numpy.array_split(fg, mpi.pool.size) coords = mpi.scatter(coords) wao_v = mpi.scatter(wao_v) fg = mpi.scatter(fg) # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] blksize = min(ngrids, sblk) for i0, i1 in lib.prange(0, ngrids, blksize): fakemol = gto.fakemol_for_charges(coords[i0:i1]) bn = df.incore.aux_e2(mol, fakemol, intor='int3c2e', aosym='s1', out=None) gbn = bn.swapaxes(0,2) gv = lib.einsum('gvt,gt->gv', gbn, fg[i0:i1]) vk[k] += lib.einsum('gu,gv->uv', wao_v[i0:i1], gv) sn = lib.einsum('gu,gv->uv', wao_v, wao_v) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) ovlp = mol.intor_symmetric('int1e_ovlp') vk[k] = numpy.matmul(ovlp, snsgk) if rank == 0: vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk, grdchg
def get_jk(mol_or_mf, dm, hermi, dmcur): #, *args, **kwargs '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): #dm = mpi.bcast_tagged_array_occdf(dm) dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.01 # set block size to adapt memory sblk = 100 # interspace betweeen v shell intsp = 1 # threshold for u and v gthrdu = 1e-7 gthrdvs = 1e-4 gthrdvd = 1e-4 global cond, wao_vx, ngridsx, coordsx, weightsx dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J and sgX set mf.with_df = mf mol = mf.mol global int2c, ovlp, ao_loc, rao_loc, ov_scr # need set mf.initsgx = None in scf.SCF __init_ if mf.initsgx is None: mf.initsgx = 0 cond = 0 # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis',auxmol.basis,'number of aux basis',int2c.shape[0]) # for sgX # ao_loc and rao_loc intbn = mol._add_suffix('int3c2e') intbn = gto.moleintor.ascint3(intbn) ao_loc = gto.moleintor.make_loc(mol._bas, intbn) rao_loc = numpy.zeros((nao),dtype=int) ovlp = mol.intor_symmetric('int1e_ovlp') for i in range(mol.nbas): for jj in range(ao_loc[i],ao_loc[i+1]): rao_loc[jj] = i # ovlp screening ov_scr = numpy.zeros((mol.nbas,mol.nbas),dtype=int) for i in range(mol.nbas): for j in range(mol.nbas): if mol.bas_atom(i) == mol.bas_atom(j): ov_scr[i,j] = 1 else: movlp = numpy.amax(numpy.absolute(ovlp[ao_loc[i]:ao_loc[i+1],ao_loc[j]:ao_loc[j+1]])) if movlp > gthrdvs: ov_scr[i,j] = 1 if rank == 0: print('thrd_nddm',thrd_nddm, 'sblk',sblk, 'intsp',intsp, 'gthrdu',gthrdu) if rank == 0: print('gthrdvs',gthrdvs, 'gthrdvd',gthrdvd) # coase and fine grids change grdchg = 0 norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k]) if norm_ddm < thrd_nddm and cond == 2 : cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, weightsx = get_gridss(mol,grdlvl_i, sblk) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, weightsx = get_gridss(mol,grdlvl_f, sblk) if rank == 0: print('grids level change to', grdlvl_f) dms = numpy.asarray(dmcur) dms = dms.reshape(-1,nao,nao) grdchg = 1 cond = 3 if rank==0: Jtime=time.time() # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k]+dms[k].T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape #if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0,paux],rec[k],root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert(nao_pair == nao*(nao+1)//2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) if rank==0: print "Took this long for J: ", time.time()-Jtime if rank==0: Jtime=time.time() # sgX wao_v = wao_vx coords = coordsx weights = weightsx for k in range(nset): '''# Plus density screening ov_scr = numpy.zeros((mol.nbas,mol.nbas),dtype=int) for i in range(mol.nbas): for j in range(mol.nbas): movlp = numpy.amax(numpy.absolute(dms[k][ao_loc[i]:ao_loc[i+1],ao_loc[j]:ao_loc[j+1]])) if movlp > gthrdvd: ov_scr[i,j] = 1''' # xfg screening ngrids = coords.shape[0] blksize = min(ngrids, sblk) gscr = [] for i0, i1 in lib.prange(0, ngrids, blksize): # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao_v[i0:i1]), axis=0) usi = numpy.argwhere(umaxg > gthrdu).reshape(-1) if len(usi) != 0: # screening v by ovlp then triangle matrix bn uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > gthrdvs).reshape(-1) udms = dms[k][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > gthrdvd).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) vshi = [] for i in range(vsh.shape[0]): ista = ao_loc[vsh[i]] iend = ao_loc[vsh[i]+1] vshi.append(numpy.arange(ista, iend)) vshi = numpy.asarray(numpy.hstack(vshi)) dmsi = dms[k][usi] fg = weights[i0:i1,None] * numpy.dot(wao_v[i0:i1,usi],dmsi[:,vshi]) gmaxfg = numpy.amax(numpy.absolute(fg), axis=1) gmaxwao_v = numpy.amax(numpy.absolute(wao_v[i0:i1,usi]), axis=1) gmaxtt = gmaxfg * gmaxwao_v gscr0 = numpy.argwhere(gmaxtt > gthrdu).reshape(-1) if gscr0.shape[0] > 0: gscr.append(gscr0 + i0) hgscr = numpy.hstack(gscr).reshape(-1) coords = mpi.gather(coords[hgscr]) wao_v = mpi.gather(wao_v[hgscr]) weights = mpi.gather(weights[hgscr]) if rank == 0: print('screened grids', coords.shape[0]) coords = numpy.array_split(coords, mpi.pool.size) wao_v = numpy.array_split(wao_v, mpi.pool.size) weights = numpy.array_split(weights, mpi.pool.size) coords = mpi.scatter(coords) wao_v = mpi.scatter(wao_v) weights = mpi.scatter(weights) # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] for i0, i1 in lib.prange(0, ngrids, blksize): # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao_v[i0:i1]), axis=0) usi = numpy.argwhere(umaxg > gthrdu).reshape(-1) if len(usi) != 0: # screening v by ovlp then triangle matrix bn uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > gthrdvs).reshape(-1) udms = dms[k][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > gthrdvd).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) nvsh = vsh.shape[0] vov0 = ov_scr[vsh] vov = vov0[:,vsh] vshi = [] xvsh = vsh ivx = [0] vx = 0 for i in range(vsh.shape[0]): ista = ao_loc[vsh[i]] iend = ao_loc[vsh[i]+1] vshi.append(numpy.arange(ista, iend)) vx += iend - ista ivx.append(vx) vshi = numpy.asarray(numpy.hstack(vshi)) nvshi = vshi.shape[0] #print('ee',nvshi) ivx = numpy.asarray(ivx) vshbeg = vsh[0] vshfin = vsh[-1]+1 dmsi = dms[k][usi] fg = weights[i0:i1,None] * numpy.dot(wao_v[i0:i1,usi],dmsi[:,vshi]) fakemol = gto.fakemol_for_charges(coords[i0:i1]) #pmol = gto.mole.conc_mol(mol, fakemol) intor = mol._add_suffix('int3c2e') atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env, fakemol._atm, fakemol._bas, fakemol._env) shls_slice = (vshbeg, vshfin, vshbeg, vshfin, mol.nbas, mol.nbas+fakemol.nbas) comp=1 #aosym='s1' aosym='s2ij' if aosym == 's2ij': gv = getints3c_scr(intor, atm, bas, env, shls_slice, comp, xvsh, nvshi, ivx, vov, fg, aosym) else: gv = getints3c_scr(intor, atm, bas, env, shls_slice, comp, xvsh, nvshi, ivx, vov, fg, aosym) vk0 = numpy.zeros((nao,nao)) vksp = lib.einsum('gu,gv->uv', wao_v[i0:i1,usi], gv) vk1 = vk0[usi] vk1[:,vshi] = vksp vk0[usi] = vk1 vk[k] += vk0 wao_vw = weights[:,None] * wao_v sn = lib.einsum('gu,gv->uv', wao_v, wao_vw) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) vk[k] = numpy.matmul(ovlp, snsgk) if hermi == 1: vk[k] = (vk[k] + vk[k].T)*.5 if rank == 0: print "Took this long for K: ", time.time()-Jtime vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk, grdchg