def _call_rmb_vhf1(mol, dm, key='giao'): c1 = .5 / lib.param.LIGHT_SPEED n2c = dm.shape[0] // 2 dmll = dm[:n2c, :n2c].copy() dmls = dm[:n2c, n2c:].copy() dmsl = dm[n2c:, :n2c].copy() dmss = dm[n2c:, n2c:].copy() vj = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex) vk = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex) vx = _vhf.rdirect_mapdm('int2e_' + key + '_sa10sp1spsp2_spinor', 's2kl', ('ji->s2kl', 'lk->s1ij', 'jk->s1il', 'li->s1kj'), dmss, 3, mol._atm, mol._bas, mol._env) * c1**4 for i in range(3): vx[0, i] = lib.hermi_triu(vx[0, i], 2) vj[:, n2c:, n2c:] = vx[0] + vx[1] vk[:, n2c:, n2c:] = vx[2] + vx[3] vx = _vhf.rdirect_bindm('int2e_' + key + '_sa10sp1_spinor', 's2kl', ('lk->s1ij', 'ji->s2kl', 'jk->s1il', 'li->s1kj'), (dmll, dmss, dmsl, dmls), 3, mol._atm, mol._bas, mol._env) * c1**2 for i in range(3): vx[1, i] = lib.hermi_triu(vx[1, i], 2) vj[:, n2c:, n2c:] += vx[0] vj[:, :n2c, :n2c] += vx[1] vk[:, n2c:, :n2c] += vx[2] vk[:, :n2c, n2c:] += vx[3] for i in range(3): vj[i] = vj[i] + vj[i].T.conj() vk[i] = vk[i] + vk[i].T.conj() return vj, vk
def test_unpack(self): a = numpy.random.random((400, 400)) a = a + a * .5j for i in range(400): a[i, i] = a[i, i].real b = a - a.T.conj() b = numpy.array((b, b)) x = lib.hermi_triu(b[0].T, hermi=2, inplace=0) self.assertAlmostEqual(abs(b[0].T - x).max(), 0, 12) x = lib.hermi_triu(b[1], hermi=2, inplace=0) self.assertAlmostEqual(abs(b[1] - x).max(), 0, 12) self.assertAlmostEqual( abs(x - lib.unpack_tril(lib.pack_tril(x), 2)).max(), 0, 12) x = lib.hermi_triu(a, hermi=1, inplace=0) self.assertAlmostEqual(abs(x - x.T.conj()).max(), 0, 12) xs = numpy.asarray((x, x, x)) self.assertAlmostEqual( abs(xs - lib.unpack_tril(lib.pack_tril(xs))).max(), 0, 12) numpy.random.seed(1) a = numpy.random.random((5050, 20)) self.assertAlmostEqual(lib.fp(lib.unpack_tril(a, axis=0)), -103.03970592075423, 10) a = numpy.zeros((5, 0)) self.assertEqual(lib.unpack_tril(a, axis=-1).shape, (5, 0, 0)) a = numpy.zeros((0, 5)) self.assertEqual(lib.unpack_tril(a, axis=0).shape, (0, 0, 5))
def make_h1_soc2e(hfcobj, dm0): mf = hfcobj._scf ni = mf._numint hyb = ni.hybrid_coeff(mf.xc, spin=mol.spin) mem_now = lib.current_memory()[0] max_memory = max(2000, mf.max_memory * .9 - mem_now) v1 = get_vxc_soc(ni, mol, mf.grids, mf.xc, dm0, max_memory=max_memory, verbose=hfcobj.verbose) if abs(hyb) > 1e-10: vj, vk = uhf_hfc.get_jk(mol, dm0) v1 += vj[0] + vj[1] v1 -= vk * hyb else: vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij', 'lk->s2ij', dm0, 3, mol._atm, mol._bas, mol._env) for i in range(3): lib.hermi_triu(vj[0, i], hermi=2, inplace=True) lib.hermi_triu(vj[1, i], hermi=2, inplace=True) v1 += vj[0] + vj[1] v1[1] *= -1 return v1
def get_j(mol, dm0): vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij', 'lk->s2ij', dm0, 3, mol._atm, mol._bas, mol._env) for i in range(3): lib.hermi_triu(vj[0, i], hermi=2, inplace=True) lib.hermi_triu(vj[1, i], hermi=2, inplace=True) return vj
def _call_rmb_vhf1(mol, dm, key='giao'): c1 = .5 / lib.param.LIGHT_SPEED n2c = dm.shape[0] // 2 dmll = dm[:n2c,:n2c].copy() dmls = dm[:n2c,n2c:].copy() dmsl = dm[n2c:,:n2c].copy() dmss = dm[n2c:,n2c:].copy() vj = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex) vk = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex) vx = _vhf.rdirect_mapdm('int2e_'+key+'_sa10sp1spsp2_spinor', 's2kl', ('ji->s2kl', 'lk->s1ij', 'jk->s1il', 'li->s1kj'), dmss, 3, mol._atm, mol._bas, mol._env) * c1**4 for i in range(3): vx[0,i] = lib.hermi_triu(vx[0,i], 2) vj[:,n2c:,n2c:] = vx[0] + vx[1] vk[:,n2c:,n2c:] = vx[2] + vx[3] vx = _vhf.rdirect_bindm('int2e_'+key+'_sa10sp1_spinor', 's2kl', ('lk->s1ij', 'ji->s2kl', 'jk->s1il', 'li->s1kj'), (dmll,dmss,dmsl,dmls), 3, mol._atm, mol._bas, mol._env) * c1**2 for i in range(3): vx[1,i] = lib.hermi_triu(vx[1,i], 2) vj[:,n2c:,n2c:] += vx[0] vj[:,:n2c,:n2c] += vx[1] vk[:,n2c:,:n2c] += vx[2] vk[:,:n2c,n2c:] += vx[3] for i in range(3): vj[i] = vj[i] + vj[i].T.conj() vk[i] = vk[i] + vk[i].T.conj() return vj, vk
def get_jk(mol_or_mf=None, dm=None, hermi=1, with_j=True, with_k=True, omega=None): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(dm is mpi.Message.SkippedArg)): dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() if omega is None: vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8) else: with mf.mol.with_range_coulomb(omega): vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8) if rank == 0: for i in range(vj.shape[0]): lib.hermi_triu(vj[i], 1, inplace=True) return vj.reshape(dm.shape), vk.reshape(dm.shape)
def _mat_hermi_(vk, hermi): if hermi == 1: if vk.ndim == 2: vk = lib.hermi_triu(vk, hermi) else: for i in range(vk.shape[0]): vk[i] = lib.hermi_triu(vk[i], hermi) return vk
def get_j(mol, dm0): vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij', 'lk->s2ij', dm0, 3, mol._atm, mol._bas, mol._env) for i in range(3): lib.hermi_triu(vj[0,i], hermi=2, inplace=True) lib.hermi_triu(vj[1,i], hermi=2, inplace=True) return vj
def get_jk(mol, dm0): # K_{pq} = (pi|iq) + (iq|pi) vj, vk, vk1 = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij', ('lk->s2ij', 'jk->s1il', 'li->s1kj'), dm0, 3, mol._atm, mol._bas, mol._env) for i in range(3): lib.hermi_triu(vj[0,i], hermi=2, inplace=True) lib.hermi_triu(vj[1,i], hermi=2, inplace=True) vk += vk1 return vj, vk
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:, mo_occ > 0] dm0 = numpy.dot(mocc, mocc.T) * 2 h1a = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] int2e_ip1 = mol._add_suffix('int2e_ip1') for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3) h1ao[:, p0:p1] += h1a[:, p0:p1] h1ao = h1ao + h1ao.transpose(0, 2, 1) shls_slice = (shl0, shl1) + (0, mol.nbas) * 3 vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm(int2e_ip1, 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) vhf = vj1 - vk1 * .5 vhf[:, p0:p1] += vj2 - vk2 * .5 vhf = vhf + vhf.transpose(0, 2, 1) if chkfile is None: h1aos.append(h1ao + vhf) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao + vhf) if chkfile is None: return h1aos else: return chkfile
def dot_eri_dm(eri, dms, nao_v=None, eri_dot_dm=True): assert (eri.dtype == numpy.double) eri = numpy.asarray(eri, order='C') dms = numpy.asarray(dms, order='C') dms_shape = dms.shape nao_dm = dms_shape[-1] if nao_v is None: nao_v = nao_dm dms = dms.reshape(-1, nao_dm, nao_dm) n_dm = dms.shape[0] vj = numpy.zeros((n_dm, nao_v, nao_v)) dmsptr = [] vjkptr = [] fjkptr = [] npair_v = nao_v * (nao_v + 1) // 2 npair_dm = nao_dm * (nao_dm + 1) // 2 if eri.ndim == 2 and npair_v * npair_dm == eri.size: # 4-fold symmetry eri if eri_dot_dm: # 'ijkl,kl->ij' fdrv = getattr(_vhf.libcvhf, 'CVHFnrs4_incore_drv_diff_size_v_dm') fvj = _vhf._fpointer('CVHFics4_kl_s2ij_diff_size') else: # 'ijkl,ij->kl' fdrv = getattr(_vhf.libcvhf, 'CVHFnrs4_incore_drv_diff_size_dm_v') fvj = _vhf._fpointer('CVHFics4_ij_s2kl_diff_size') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvj) else: raise RuntimeError( 'Array shape not consistent: nao_v %s, DM %s, eri %s' % (nao_v, dms_shape, eri.shape)) n_ops = len(dmsptr) fdrv(eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_void_p * n_ops)(*dmsptr), (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops), ctypes.c_int(nao_v), ctypes.c_int(nao_dm), (ctypes.c_void_p * n_ops)(*fjkptr)) for i in range(n_dm): lib.hermi_triu(vj[i], 1, inplace=True) if n_dm == 1: vj = vj.reshape((nao_v, nao_v)) else: vj = vj.reshape((n_dm, nao_v, nao_v)) return vj
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:,mo_occ>0] dm0 = numpy.dot(mocc, mocc.T) * 2 h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) + mol.intor('cint1e_ipnuc_sph', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3) h1ao[:,p0:p1] += h1a[:,p0:p1] h1ao = h1ao + h1ao.transpose(0,2,1) shls_slice = (shl0, shl1) + (0, mol.nbas)*3 vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm('cint2e_ip1_sph', 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) vhf = vj1 - vk1*.5 vhf[:,p0:p1] += vj2 - vk2*.5 vhf = vhf + vhf.transpose(0,2,1) if chkfile is None: h1aos.append(h1ao+vhf) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao+vhf) if chkfile is None: return h1aos else: return chkfile
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400): '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463 ''' if norb > 63: raise NotImplementedError('norb > 63') neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag is None: hdiag = make_hdiag(h1e, eri, norb, nelec) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np-1)[:np] except AttributeError: addr = numpy.argsort(hdiag)[:np] addra, addrb = divmod(addr, nb) stra = cistring.addrs2str(norb, neleca, addra) strb = cistring.addrs2str(norb, nelecb, addrb) np = len(addr) h0 = numpy.zeros((np,np)) libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) for i in range(np): h0[i,i] = hdiag[addr[i]] h0 = lib.hermi_triu(h0) return addr, h0
def pspace(h1e, eri, norb, nelec, hdiag, np=400): '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463 ''' neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np-1)[:np] except AttributeError: addr = numpy.argsort(hdiag)[:np] addra, addrb = divmod(addr, nb) stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra], dtype=numpy.uint64) strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb], dtype=numpy.uint64) np = len(addr) h0 = numpy.zeros((np,np)) libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) for i in range(np): h0[i,i] = hdiag[addr[i]] h0 = lib.hermi_triu(h0) return addr, h0
def _call_giao_vhf1(mol, dm): c1 = .5 / lib.param.LIGHT_SPEED n2c = dm.shape[0] // 2 dmll = dm[:n2c, :n2c].copy() dmls = dm[:n2c, n2c:].copy() dmsl = dm[n2c:, :n2c].copy() dmss = dm[n2c:, n2c:].copy() vj = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex) vk = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex) vx = _vhf.rdirect_mapdm('int2e_g1_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), dmll, 3, mol._atm, mol._bas, mol._env) vj[:, :n2c, :n2c] = vx[0] vk[:, :n2c, :n2c] = vx[1] vx = _vhf.rdirect_mapdm('int2e_spgsp1spsp2_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), dmss, 3, mol._atm, mol._bas, mol._env) * c1**4 vj[:, n2c:, n2c:] = vx[0] vk[:, n2c:, n2c:] = vx[1] vx = _vhf.rdirect_bindm('int2e_g1spsp2_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), (dmss, dmls), 3, mol._atm, mol._bas, mol._env) * c1**2 vj[:, :n2c, :n2c] += vx[0] vk[:, :n2c, n2c:] += vx[1] vx = _vhf.rdirect_bindm('int2e_spgsp1_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), (dmll, dmsl), 3, mol._atm, mol._bas, mol._env) * c1**2 vj[:, n2c:, n2c:] += vx[0] vk[:, n2c:, :n2c] += vx[1] for i in range(3): vj[i] = lib.hermi_triu(vj[i], 1) vk[i] = vk[i] + vk[i].T.conj() return vj, vk
def _call_giao_vhf1(mol, dm): c1 = .5 / lib.param.LIGHT_SPEED n2c = dm.shape[0] // 2 dmll = dm[:n2c,:n2c].copy() dmls = dm[:n2c,n2c:].copy() dmsl = dm[n2c:,:n2c].copy() dmss = dm[n2c:,n2c:].copy() vj = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex) vk = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex) vx = _vhf.rdirect_mapdm('int2e_g1_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), dmll, 3, mol._atm, mol._bas, mol._env) vj[:,:n2c,:n2c] = vx[0] vk[:,:n2c,:n2c] = vx[1] vx = _vhf.rdirect_mapdm('int2e_spgsp1spsp2_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), dmss, 3, mol._atm, mol._bas, mol._env) * c1**4 vj[:,n2c:,n2c:] = vx[0] vk[:,n2c:,n2c:] = vx[1] vx = _vhf.rdirect_bindm('int2e_g1spsp2_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), (dmss,dmls), 3, mol._atm, mol._bas, mol._env) * c1**2 vj[:,:n2c,:n2c] += vx[0] vk[:,:n2c,n2c:] += vx[1] vx = _vhf.rdirect_bindm('int2e_spgsp1_spinor', 'a4ij', ('lk->s2ij', 'jk->s1il'), (dmll,dmsl), 3, mol._atm, mol._bas, mol._env) * c1**2 vj[:,n2c:,n2c:] += vx[0] vk[:,n2c:,:n2c] += vx[1] for i in range(3): vj[i] = lib.hermi_triu(vj[i], 1) vk[i] = vk[i] + vk[i].T.conj() return vj, vk
def pspace(h1e, eri, norb, nelec, hdiag, np=400): neleca, nelecb = direct_spin1._unpack_nelec(nelec) h1e_a = numpy.ascontiguousarray(h1e[0]) h1e_b = numpy.ascontiguousarray(h1e[1]) g2e_aa = ao2mo.restore(1, eri[0], norb) g2e_ab = ao2mo.restore(1, eri[1], norb) g2e_bb = ao2mo.restore(1, eri[2], norb) link_indexa = cistring.gen_linkstr_index_trilidx(range(norb), neleca) link_indexb = cistring.gen_linkstr_index_trilidx(range(norb), nelecb) nb = link_indexb.shape[0] addr = numpy.argsort(hdiag)[:np] addra = addr // nb addrb = addr % nb stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra], dtype=numpy.long) strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb], dtype=numpy.long) np = len(addr) h0 = numpy.zeros((np,np)) libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p), h1e_a.ctypes.data_as(ctypes.c_void_p), h1e_b.ctypes.data_as(ctypes.c_void_p), g2e_aa.ctypes.data_as(ctypes.c_void_p), g2e_ab.ctypes.data_as(ctypes.c_void_p), g2e_bb.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) for i in range(np): h0[i,i] = hdiag[addr[i]] h0 = lib.hermi_triu(h0) return addr, h0
def incore(eri, dm, hermi=0): assert(not numpy.iscomplexobj(eri)) eri = numpy.ascontiguousarray(eri) dm = numpy.ascontiguousarray(dm) nao = dm.shape[0] vj = numpy.empty((nao,nao)) vk = numpy.empty((nao,nao)) npair = nao*(nao+1)//2 if eri.ndim == 2 and npair*npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') tridm = dm elif eri.ndim == 1 and npair*(npair+1)//2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') fvj = _fpointer('CVHFics8_tridm_vj') if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') tridm = lib.pack_tril(lib.transpose_sum(dm)) i = numpy.arange(nao) tridm[i*(i+1)//2+i] *= .5 else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dm.shape, eri.shape)) fdrv(eri.ctypes.data_as(ctypes.c_void_p), tridm.ctypes.data_as(ctypes.c_void_p), vj.ctypes.data_as(ctypes.c_void_p), dm.ctypes.data_as(ctypes.c_void_p), vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk) if hermi != 0: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: vj = lib.hermi_triu(vj, 1) return vj, vk
def incore(eri, dm, hermi=0): assert (not numpy.iscomplexobj(eri)) eri = numpy.ascontiguousarray(eri) dm = numpy.ascontiguousarray(dm) nao = dm.shape[0] vj = numpy.empty((nao, nao)) vk = numpy.empty((nao, nao)) npair = nao * (nao + 1) // 2 if eri.ndim == 2 and npair * npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') tridm = dm elif eri.ndim == 1 and npair * (npair + 1) // 2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') fvj = _fpointer('CVHFics8_tridm_vj') if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') tridm = lib.pack_tril(lib.transpose_sum(dm)) i = numpy.arange(nao) tridm[i * (i + 1) // 2 + i] *= .5 else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dm.shape, eri.shape)) fdrv(eri.ctypes.data_as(ctypes.c_void_p), tridm.ctypes.data_as(ctypes.c_void_p), vj.ctypes.data_as(ctypes.c_void_p), dm.ctypes.data_as(ctypes.c_void_p), vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk) if hermi != 0: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: vj = lib.hermi_triu(vj, 1) return vj, vk
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400): '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463 ''' if norb > 63: raise NotImplementedError('norb > 63') if h1e.dtype == numpy.complex or eri.dtype == numpy.complex: raise NotImplementedError('Complex Hamiltonian') neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag is None: hdiag = make_hdiag(h1e, eri, norb, nelec) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np - 1)[:np].copy() except AttributeError: addr = numpy.argsort(hdiag)[:np].copy() addra, addrb = divmod(addr, nb) stra = cistring.addrs2str(norb, neleca, addra) strb = cistring.addrs2str(norb, nelecb, addrb) np = len(addr) h0 = numpy.zeros((np, np)) libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) HERMITIAN_THRESHOLD = 1e-10 if (abs(h1e - h1e.T).max() < HERMITIAN_THRESHOLD and abs(eri - eri.transpose(1, 0, 3, 2)).max() < HERMITIAN_THRESHOLD): # symmetric Hamiltonian h0 = lib.hermi_triu(h0) else: # Fill the upper triangular part h0 = numpy.asarray(h0, order='F') h1e = numpy.asarray(h1e.T, order='C') eri = numpy.asarray(eri.transpose(1, 0, 3, 2), order='C') libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) idx = numpy.arange(np) h0[idx, idx] = hdiag[addr] return addr, h0
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array(dm) mf.unpack_(comm.bcast(mf.pack())) if mf.opt is None: mf.opt = mf.init_direct_scf() vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8) if rank == 0: for i in range(vj.shape[0]): lib.hermi_triu(vj[i], 1, inplace=True) return vj.reshape(dm.shape), vk.reshape(dm.shape)
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400): '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463 ''' if norb > 63: raise NotImplementedError('norb > 63') neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag is None: hdiag = make_hdiag(h1e, eri, norb, nelec) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np-1)[:np] except AttributeError: addr = numpy.argsort(hdiag)[:np] addra, addrb = divmod(addr, nb) stra = cistring.addrs2str(norb, neleca, addra) strb = cistring.addrs2str(norb, nelecb, addrb) np = len(addr) h0 = numpy.zeros((np,np)) libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) HERMITIAN_THRESHOLD = 1e-10 if (abs(h1e - h1e.T).max() < HERMITIAN_THRESHOLD and abs(eri - eri.transpose(1,0,3,2)).max() < HERMITIAN_THRESHOLD): # symmetric Hamiltonian h0 = lib.hermi_triu(h0) else: # Fill the upper triangular part h0 = numpy.asarray(h0, order='F') h1e = numpy.asarray(h1e.T, order='C') eri = numpy.asarray(eri.transpose(1,0,3,2), order='C') libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) idx = numpy.arange(np) h0[idx,idx] = hdiag[addr] return addr, h0
def _get_jk(mol, intor, comp, aosym, script_dms, shls_slice=None, cintopt=None, vhfopt=None): intor = mol._add_suffix(intor) scripts = script_dms[::2] dms = script_dms[1::2] vs = _vhf.direct_bindm(intor, aosym, scripts, dms, comp, mol._atm, mol._bas, mol._env, vhfopt=vhfopt, cintopt=cintopt, shls_slice=shls_slice) for k, script in enumerate(scripts): if 's2' in script: hermi = 1 elif 'a2' in script: hermi = 2 else: continue shape = vs[k].shape if shape[-2] == shape[-1]: if comp > 1: for i in range(comp): lib.hermi_triu(vs[k][i], hermi=hermi, inplace=True) else: lib.hermi_triu(vs[k], hermi=hermi, inplace=True) return vs
def test_unpack(self): a = numpy.random.random((400,400)) a = a+a*.5j for i in range(400): a[i,i] = a[i,i].real b = a-a.T.conj() b = numpy.array((b,b)) x = lib.hermi_triu(b[0].T, hermi=2, inplace=0) self.assertAlmostEqual(abs(b[0].T-x).max(), 0, 12) x = lib.hermi_triu(b[1], hermi=2, inplace=0) self.assertAlmostEqual(abs(b[1]-x).max(), 0, 12) self.assertAlmostEqual(abs(x - lib.unpack_tril(lib.pack_tril(x), 2)).max(), 0, 12) x = lib.hermi_triu(a, hermi=1, inplace=0) self.assertAlmostEqual(abs(x-x.T.conj()).max(), 0, 12) xs = numpy.asarray((x,x,x)) self.assertAlmostEqual(abs(xs - lib.unpack_tril(lib.pack_tril(xs))).max(), 0, 12) numpy.random.seed(1) a = numpy.random.random((5050,20)) self.assertAlmostEqual(lib.finger(lib.unpack_tril(a, axis=0)), -103.03970592075423, 10)
def _get_jk(mol, intor, comp, aosym, script_dms, shls_slice=None, cintopt=None): intor = mol._add_suffix(intor) scripts = script_dms[::2] dms = script_dms[1::2] vs = _vhf.direct_bindm(intor, aosym, scripts, dms, comp, mol._atm, mol._bas, mol._env, cintopt=cintopt, shls_slice=shls_slice) for k, script in enumerate(scripts): if 's2' in script: hermi = 1 elif 'a2' in script: hermi = 2 else: continue shape = vs[k].shape if shape[-2] == shape[-1]: if comp > 1: for i in range(comp): lib.hermi_triu(vs[k][i], hermi=hermi, inplace=True) else: lib.hermi_triu(vs[k], hermi=hermi, inplace=True) return vs
def intor(mol): nao = mol.nao_nr() mat = numpy.zeros((nao,nao)) ip = 0 for ish in range(mol.nbas): jp = 0 for jsh in range(ish+1): buf = type1_by_shell(mol, (ish,jsh)) di, dj = buf.shape mat[ip:ip+di,jp:jp+dj] += buf buf = type2_by_shell(mol, (ish,jsh)) di, dj = buf.shape mat[ip:ip+di,jp:jp+dj] += buf jp += dj ip += di return lib.hermi_triu(mat)
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400): neleca, nelecb = direct_spin1._unpack_nelec(nelec) h1e_a = numpy.ascontiguousarray(h1e[0]) h1e_b = numpy.ascontiguousarray(h1e[1]) g2e_aa = ao2mo.restore(1, eri[0], norb) g2e_ab = ao2mo.restore(1, eri[1], norb) g2e_bb = ao2mo.restore(1, eri[2], norb) link_indexa = cistring.gen_linkstr_index_trilidx(range(norb), neleca) link_indexb = cistring.gen_linkstr_index_trilidx(range(norb), nelecb) nb = link_indexb.shape[0] if hdiag is None: hdiag = make_hdiag(h1e, eri, norb, nelec) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np - 1)[:np] except AttributeError: addr = numpy.argsort(hdiag)[:np] addra = addr // nb addrb = addr % nb stra = numpy.array([cistring.addr2str(norb, neleca, ia) for ia in addra], dtype=numpy.long) strb = numpy.array([cistring.addr2str(norb, nelecb, ib) for ib in addrb], dtype=numpy.long) np = len(addr) h0 = numpy.zeros((np, np)) libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p), h1e_a.ctypes.data_as(ctypes.c_void_p), h1e_b.ctypes.data_as(ctypes.c_void_p), g2e_aa.ctypes.data_as(ctypes.c_void_p), g2e_ab.ctypes.data_as(ctypes.c_void_p), g2e_bb.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) for i in range(np): h0[i, i] = hdiag[addr[i]] h0 = lib.hermi_triu(h0) return addr, h0
def _jk_triu_(vj, vk, hermi): if hermi == 0: if vj.ndim == 2: vj = lib.hermi_triu(vj, 1) else: for i in range(vj.shape[0]): vj[i] = lib.hermi_triu(vj[i], 1) else: if vj.ndim == 2: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: for i in range(vj.shape[0]): vj[i] = lib.hermi_triu(vj[i], hermi) vk[i] = lib.hermi_triu(vk[i], hermi) return vj, vk
def runjks2(dm1, ncomp, intorname, filldot, *namejk): vjk = runjk(dm1, ncomp, intorname, filldot, *namejk) return [lib.hermi_triu(v, 1) for v in vjk]
def test_direct_jk_s2(self): numpy.random.seed(15) dm1 = numpy.random.random((nao, nao)) dm1 = dm1 + dm1.T vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1) vj1, vk1 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs8", "CVHFnrs8_ji_s2kl", "CVHFnrs8_jk_s2il") self.assertTrue(numpy.allclose(vj0, vj1)) self.assertTrue(numpy.allclose(vk0, vk1)) eri1 = ao2mo.restore(1, rhf._eri, nao) vj0 = numpy.einsum("ijkl,kl->ij", eri1, dm1) vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1) vj1, vj2 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs4", "CVHFnrs4_ji_s2kl", "CVHFnrs4_jk_s2il") self.assertTrue(numpy.allclose(vj0, vj1)) self.assertTrue(numpy.allclose(vk0, vj2)) vj1, vk1 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs4", "CVHFnrs4_li_s2kj", "CVHFnrs4_jk_s2il") self.assertTrue(numpy.allclose(vk0, vj1)) self.assertTrue(numpy.allclose(vk0, vk1)) vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1) vk1 = runjks2( dm1, 1, "cint2e_sph", "CVHFdot_nrs4", "CVHFnrs4_li_s2kj", "CVHFnrs4_jk_s2il", "CVHFnrs4_li_s2kj", "CVHFnrs4_jk_s2il", ) self.assertTrue(numpy.allclose(vk0, vk1[0])) self.assertTrue(numpy.allclose(vk0, vk1[1])) self.assertTrue(numpy.allclose(vk0, vk1[2])) self.assertTrue(numpy.allclose(vk0, vk1[3])) vj0 = numpy.einsum("ijkl,kl->ij", eri1, dm1) vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1) vk1 = runjks2( dm1, 1, "cint2e_sph", "CVHFdot_nrs2kl", "CVHFnrs2kl_ji_s2kl", "CVHFnrs2kl_lk_s2ij", "CVHFnrs2kl_jk_s2il", "CVHFnrs2kl_li_s2kj", ) self.assertTrue(numpy.allclose(vj0, vk1[0])) self.assertTrue(numpy.allclose(vj0, vk1[1])) self.assertTrue(numpy.allclose(vk0, vk1[2])) self.assertTrue(numpy.allclose(vk0, vk1[3])) vk1 = runjks2( dm1, 1, "cint2e_sph", "CVHFdot_nrs2ij", "CVHFnrs2ij_ji_s2kl", "CVHFnrs2ij_lk_s2ij", "CVHFnrs2ij_jk_s2il", "CVHFnrs2ij_li_s2kj", ) self.assertTrue(numpy.allclose(vj0, vk1[0])) self.assertTrue(numpy.allclose(vj0, vk1[1])) self.assertTrue(numpy.allclose(vk0, vk1[2])) self.assertTrue(numpy.allclose(vk0, vk1[3])) vk1 = runjks2( dm1, 1, "cint2e_sph", "CVHFdot_nrs1", "CVHFnrs1_ji_s2kl", "CVHFnrs1_lk_s2ij", "CVHFnrs1_jk_s2il", "CVHFnrs1_li_s2kj", ) self.assertTrue(numpy.allclose(vj0, vk1[0])) self.assertTrue(numpy.allclose(vj0, vk1[1])) self.assertTrue(numpy.allclose(vk0, vk1[2])) self.assertTrue(numpy.allclose(vk0, vk1[3])) vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1) vj1, vk1 = runjk(dm1, 1, "cint2e_sph", "CVHFdot_nrs8", "CVHFnrs8_ji_s2kl", "CVHFnrs8_jk_s2il") vj1 = lib.hermi_triu(vj1, 1) vk1 = lib.hermi_triu(vk1, 1) self.assertTrue(numpy.allclose(vj0, vj1)) self.assertTrue(numpy.allclose(vk0, vk1))
def get_jk_favorj(sgx, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): t0 = time.clock(), time.time() mol = sgx.mol grids = sgx.grids gthrd = sgx.grids_thrd dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] if sgx.debug: batch_nuc = _gen_batch_nuc(mol) else: batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol) # for basis set to shell intor = mol._add_suffix('int3c2e') fakemol = gto.fakemol_for_charges(grids.coords) atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env, fakemol._atm, fakemol._bas, fakemol._env) ao_loc = moleintor.make_loc(bas, intor) rao_loc = numpy.zeros((nao), dtype=int) for i in range(mol.nbas): for j in range(ao_loc[i], ao_loc[i + 1]): rao_loc[j] = i sn = numpy.zeros((nao, nao)) ngrids = grids.coords.shape[0] max_memory = sgx.max_memory - lib.current_memory()[0] sblk = sgx.blockdim blksize = min(ngrids, max(4, int(min(sblk, max_memory * 1e6 / 8 / nao**2)))) for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1, None] sn += lib.dot(ao.T, wao) ovlp = mol.intor_symmetric('int1e_ovlp') proj = scipy.linalg.solve(sn, ovlp) proj_dm = lib.einsum('ki,xij->xkj', proj, dms) t1 = logger.timer_debug1(mol, "sgX initialziation", *t0) vj = numpy.zeros_like(dms) vk = numpy.zeros_like(dms) tnuc = 0, 0 for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1, None] fg = lib.einsum('gi,xij->xgj', wao, proj_dm) mask = numpy.zeros(i1 - i0, dtype=bool) for i in range(nset): gmaxfg = numpy.amax(numpy.absolute(fg[i]), axis=1) gmaxwao_v = numpy.amax(numpy.absolute(ao), axis=1) gmaxtt = gmaxfg * gmaxwao_v mask |= numpy.any(gmaxtt > 1e-7) mask |= numpy.any(gmaxtt < -1e-7) if not numpy.all(mask): ao = ao[mask] wao = wao[mask] fg = fg[:, mask] coords = coords[mask] # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao), axis=0) usi = numpy.argwhere(umaxg > 1e-7).reshape(-1) if len(usi) != 0: # screening v by ovlp uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1) udms = proj_dm[0][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > 1e-4).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) mol._bvv = vsh # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao), axis=0) usi = numpy.argwhere(umaxg > 1e-7).reshape(-1) if len(usi) != 0: # screening v by ovlp uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1) if len(osi) != 0: vsh = numpy.unique(rao_loc[osi]) #print(vsh.shape,'eew',vsh) mol._bvv = vsh fg = lib.einsum('gi,xij->xgj', wao, proj_dm) mask = numpy.zeros(i1 - i0, dtype=bool) for i in range(nset): mask |= numpy.any(fg[i] > gthrd, axis=1) mask |= numpy.any(fg[i] < -gthrd, axis=1) if not numpy.all(mask): ao = ao[mask] fg = fg[:, mask] coords = coords[mask] if with_j: rhog = numpy.einsum('xgu,gu->xg', fg, ao) else: rhog = None if sgx.debug: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() gbn = batch_nuc(mol, coords) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: jpart = numpy.einsum('guv,xg->xuv', gbn, rhog) if with_k: gv = lib.einsum('gtv,xgt->xgv', gbn, fg) gbn = None else: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() jpart, gv = batch_jk(mol, coords, rhog, fg) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: vj += jpart if with_k: for i in range(nset): vk[i] += lib.einsum('gu,gv->uv', ao, gv[i]) jpart = gv = None t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1) tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1] logger.debug1( sgx, '(CPU, wall) time for integrals (%.2f, %.2f); ' 'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0], tdot[1]) for i in range(nset): lib.hermi_triu(vj[i], inplace=True) if with_k and hermi == 1: vk = (vk + vk.transpose(0, 2, 1)) * .5 logger.timer(mol, "vj and vk", *t0) return vj.reshape(dm_shape), vk.reshape(dm_shape)
def intor_cross(intor, cell1, cell2, comp=1, hermi=0, kpts=None, kpt=None): r'''1-electron integrals from two cells like .. math:: \langle \mu | intor | \nu \rangle, \mu \in cell1, \nu \in cell2 ''' intor = moleintor.ascint3(intor) if kpts is None: if kpt is not None: kpts_lst = np.reshape(kpt, (1, 3)) else: kpts_lst = np.zeros((1, 3)) else: kpts_lst = np.reshape(kpts, (-1, 3)) nkpts = len(kpts_lst) atm, bas, env = conc_env(cell1._atm, cell1._bas, cell1._env, cell2._atm, cell2._bas, cell2._env) atm = np.asarray(atm, dtype=np.int32) bas = np.asarray(bas, dtype=np.int32) env = np.asarray(env, dtype=np.double) natm = len(atm) nbas = len(bas) shls_slice = (0, cell1.nbas, cell1.nbas, nbas) ao_loc = moleintor.make_loc(bas, intor) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] out = np.empty((nkpts, comp, ni, nj), dtype=np.complex128) if hermi == 0: aosym = 's1' else: aosym = 's2' fill = getattr(libpbc, 'PBCnr2c_fill_k' + aosym) fintor = getattr(moleintor.libcgto, intor) intopt = lib.c_null_ptr() Ls = cell1.get_lattice_Ls(rcut=max(cell1.rcut, cell2.rcut)) expkL = np.asarray(np.exp(1j * np.dot(kpts_lst, Ls.T)), order='C') drv = libpbc.PBCnr2c_drv drv(fintor, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(len(Ls)), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(*(shls_slice[:4])), ao_loc.ctypes.data_as(ctypes.c_void_p), intopt, atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas), env.ctypes.data_as(ctypes.c_void_p)) mat = [] for k, kpt in enumerate(kpts_lst): v = out[k] if hermi != 0: for ic in range(comp): lib.hermi_triu(v[ic], hermi=hermi, inplace=True) if comp == 1: v = v[0] if abs(kpt).sum() < 1e-9: # gamma_point v = v.real mat.append(v) if kpts is None or np.shape(kpts) == (3, ): # A single k-point mat = mat[0] return mat
def make_hdiag_csf_slower (h1e, eri, norb, nelec, transformer, hdiag_det=None): ''' This is tricky because I need the diagonal blocks for each configuration in order to get the correct csf hdiag values, not just the diagonal elements for each determinant. ''' smult = transformer.smult t0, w0 = time.process_time (), time.time () tstr = tlib = tloop = wstr = wlib = wloop = 0 if hdiag_det is None: hdiag_det = make_hdiag_det (None, h1e, eri, norb, nelec) eri = ao2mo.restore(1, eri, norb) neleca, nelecb = _unpack_nelec (nelec) min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape (norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult) npair_econf_size = npair_dconf_size * npair_sconf_size max_npair = min (neleca, nelecb) ncsf_all = count_all_csfs (norb, neleca, nelecb, smult) ndeta_all = cistring.num_strings(norb, neleca) ndetb_all = cistring.num_strings(norb, nelecb) ndet_all = ndeta_all * ndetb_all hdiag_csf = np.ascontiguousarray (np.zeros (ncsf_all, dtype=np.float64)) hdiag_csf_check = np.ones (ncsf_all, dtype=np.bool) for npair in range (min_npair, max_npair+1): ipair = npair - min_npair nconf = npair_econf_size[ipair] ndet = npair_sdet_size[ipair] ncsf = npair_csf_size[ipair] if ncsf == 0: continue nspin = neleca + nelecb - 2*npair csd_offset = npair_csd_offset[ipair] csf_offset = npair_csf_offset[ipair] hdiag_conf = np.ascontiguousarray (np.zeros ((nconf, ndet, ndet), dtype=np.float64)) det_addr = transformer.csd_mask[csd_offset:][:nconf*ndet] if ndet == 1: # Closed-shell singlets assert (ncsf == 1) hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat] hdiag_csf_check[csf_offset:][:nconf] = False continue umat = get_spin_evecs (nspin, neleca, nelecb, smult) det_addra, det_addrb = divmod (det_addr, ndetb_all) t1, w1 = time.process_time (), time.time () det_stra = cistring.addrs2str (norb, neleca, det_addra).reshape (nconf, ndet, order='C') det_strb = cistring.addrs2str (norb, nelecb, det_addrb).reshape (nconf, ndet, order='C') tstr += time.process_time () - t1 wstr += time.time () - w1 det_addr = det_addr.reshape (nconf, ndet, order='C') diag_idx = np.diag_indices (ndet) triu_idx = np.triu_indices (ndet) ipair_check = 0 # It looks like the library call below is, itself, usually responsible for about 50% of the # clock and wall time that this function consumes. t1, w1 = time.process_time (), time.time () for iconf in range (nconf): addr = det_addr[iconf] assert (len (addr) == ndet) stra = det_stra[iconf] strb = det_strb[iconf] t2, w2 = time.process_time (), time.time () libfci.FCIpspace_h0tril(hdiag_conf[iconf].ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(ndet)) tlib += time.process_time () - t2 wlib += time.time () - w2 #hdiag_conf[iconf][diag_idx] = hdiag_det[addr] #hdiag_conf[iconf] = lib.hermi_triu(hdiag_conf[iconf]) for iconf in range (nconf): hdiag_conf[iconf] = lib.hermi_triu (hdiag_conf[iconf]) for iconf in range (nconf): hdiag_conf[iconf][diag_idx] = hdiag_det[det_addr[iconf]] tloop += time.process_time () - t1 wloop += time.time () - w1 hdiag_conf = np.tensordot (hdiag_conf, umat, axes=1) hdiag_conf = (hdiag_conf * umat[np.newaxis,:,:]).sum (1) hdiag_csf[csf_offset:][:nconf*ncsf] = hdiag_conf.ravel (order='C') hdiag_csf_check[csf_offset:][:nconf*ncsf] = False assert (np.count_nonzero (hdiag_csf_check) == 0), np.count_nonzero (hdiag_csf_check) #print ("Total time in hdiag_csf: {}, {}".format (time.process_time () - t0, time.time () - w0)) #print (" Loop: {}, {}".format (tloop, wloop)) #print (" Library: {}, {}".format (tlib, wlib)) #print (" Cistring: {}, {}".format (tstr, wstr)) return hdiag_csf
def _eval_jk(mf, dm, hermi, gen_jobs): cpu0 = (logger.process_clock(), logger.perf_counter()) mol = mf.mol ao_loc = mol.ao_loc_nr() nao = ao_loc[-1] bas_groups = _partition_bas(mol) jobs = gen_jobs(len(bas_groups), hermi) njobs = len(jobs) logger.debug1(mf, 'njobs %d', njobs) # Each job has multiple recipes. n_recipes = len(jobs[0][1:]) dm = numpy.asarray(dm).reshape(-1, nao, nao) n_dm = dm.shape[0] vk = numpy.zeros((n_recipes, n_dm, nao, nao)) if mf.opt is None: vhfopt = mf.init_direct_scf(mol) else: vhfopt = mf.opt # Assign the entire dm_cond to vhfopt. # The prescreen function CVHFnrs8_prescreen will index q_cond and dm_cond # over the entire basis. "set_dm" in function jk.get_jk/direct_bindm only # creates a subblock of dm_cond which is not compatible with # CVHFnrs8_prescreen. vhfopt.set_dm(dm, mol._atm, mol._bas, mol._env) # Then skip the "set_dm" initialization in function jk.get_jk/direct_bindm. vhfopt._dmcondname = None logger.timer_debug1(mf, 'get_jk initialization', *cpu0) for job_id in mpi.work_stealing_partition(range(njobs)): group_ids = jobs[job_id][0] recipes = jobs[job_id][1:] shls_slice = lib.flatten([bas_groups[i] for i in group_ids]) loc = ao_loc[shls_slice].reshape(4, 2) dm_blks = [] for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[0]] q0, q1 = loc[rec[1]] dm_blks.append(dm[i_dm, p0:p1, q0:q1]) scripts = [ 'ijkl,%s%s->%s%s' % tuple(['ijkl'[x] for x in rec]) for recipe in recipes for rec in recipe ] * n_dm kparts = jk.get_jk(mol, dm_blks, scripts, shls_slice=shls_slice, vhfopt=vhfopt) for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[2]] q0, q1 = loc[rec[3]] vk[ir, i_dm, p0:p1, q0:q1] += kparts[i] # Pop the results of one recipe kparts = kparts[i + 1:] vk = mpi.reduce(vk) if rank == 0: if hermi: for i in range(n_recipes): for j in range(n_dm): lib.hermi_triu(vk[i, j], hermi, inplace=True) else: # Zero out vk on workers. If reduce(get_jk()) is called twice, # non-zero vk on workers can cause error. vk[:] = 0 logger.timer(mf, 'get_jk', *cpu0) return vk
def direct(dms, atm, bas, env, vhfopt=None, hermi=0, cart=False): c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C') c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C') c_env = numpy.asarray(env, dtype=numpy.double, order='C') natm = ctypes.c_int(c_atm.shape[0]) nbas = ctypes.c_int(c_bas.shape[0]) if isinstance(dms, numpy.ndarray) and dms.ndim == 2: dms = dms[numpy.newaxis, :, :] n_dm = len(dms) nao = dms[0].shape[0] dms = numpy.asarray(dms, order='C') if vhfopt is None: if cart: intor = 'int2e_cart' else: intor = 'int2e_sph' cintopt = make_cintopt(c_atm, c_bas, c_env, intor) cvhfopt = lib.c_null_ptr() else: vhfopt.set_dm(dms, atm, bas, env) cvhfopt = vhfopt._this cintopt = vhfopt._cintopt intor = vhfopt._intor cintor = _fpointer(intor) fdrv = getattr(libcvhf, 'CVHFnr_direct_drv') fdot = _fpointer('CVHFdot_nrs8') fvj = _fpointer('CVHFnrs8_ji_s2kl') if hermi == 1: fvk = _fpointer('CVHFnrs8_li_s2kj') else: fvk = _fpointer('CVHFnrs8_li_s1kj') vjk = numpy.empty((2, n_dm, nao, nao)) fjk = (ctypes.c_void_p * (2 * n_dm))() dmsptr = (ctypes.c_void_p * (2 * n_dm))() vjkptr = (ctypes.c_void_p * (2 * n_dm))() for i in range(n_dm): dmsptr[i] = dms[i].ctypes.data_as(ctypes.c_void_p) vjkptr[i] = vjk[0, i].ctypes.data_as(ctypes.c_void_p) fjk[i] = fvj for i in range(n_dm): dmsptr[n_dm + i] = dms[i].ctypes.data_as(ctypes.c_void_p) vjkptr[n_dm + i] = vjk[1, i].ctypes.data_as(ctypes.c_void_p) fjk[n_dm + i] = fvk shls_slice = (ctypes.c_int * 8)(*([0, c_bas.shape[0]] * 4)) ao_loc = make_loc(bas, intor) fdrv(cintor, fdot, fjk, dmsptr, vjkptr, ctypes.c_int(n_dm * 2), ctypes.c_int(1), shls_slice, ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cvhfopt, c_atm.ctypes.data_as(ctypes.c_void_p), natm, c_bas.ctypes.data_as(ctypes.c_void_p), nbas, c_env.ctypes.data_as(ctypes.c_void_p)) # vj must be symmetric for idm in range(n_dm): vjk[0, idm] = lib.hermi_triu(vjk[0, idm], 1) if hermi != 0: # vk depends for idm in range(n_dm): vjk[1, idm] = lib.hermi_triu(vjk[1, idm], hermi) if n_dm == 1: vjk = vjk.reshape(2, nao, nao) return vjk
def direct(dms, atm, bas, env, vhfopt=None, hermi=0, cart=False, with_j=True, with_k=True): c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C') c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C') c_env = numpy.asarray(env, dtype=numpy.double, order='C') natm = ctypes.c_int(c_atm.shape[0]) nbas = ctypes.c_int(c_bas.shape[0]) dms = numpy.asarray(dms, order='C') dms_shape = dms.shape nao = dms_shape[-1] dms = dms.reshape(-1, nao, nao) n_dm = dms.shape[0] if vhfopt is None: if cart: intor = 'int2e_cart' else: intor = 'int2e_sph' cintopt = make_cintopt(c_atm, c_bas, c_env, intor) cvhfopt = lib.c_null_ptr() else: vhfopt.set_dm(dms, atm, bas, env) cvhfopt = vhfopt._this cintopt = vhfopt._cintopt intor = vhfopt._intor cintor = _fpointer(intor) fdrv = getattr(libcvhf, 'CVHFnr_direct_drv') fdot = _fpointer('CVHFdot_nrs8') vj = vk = None dmsptr = [] vjkptr = [] fjk = [] if with_j: fvj = _fpointer('CVHFnrs8_ji_s2kl') vj = numpy.empty((n_dm, nao, nao)) for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjk.append(fvj) if with_k: if hermi == 1: fvk = _fpointer('CVHFnrs8_li_s2kj') else: fvk = _fpointer('CVHFnrs8_li_s1kj') vk = numpy.empty((n_dm, nao, nao)) for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p)) fjk.append(fvk) shls_slice = (ctypes.c_int * 8)(*([0, c_bas.shape[0]] * 4)) ao_loc = make_loc(bas, intor) n_ops = len(dmsptr) comp = 1 fdrv(cintor, fdot, (ctypes.c_void_p * n_ops)(*fjk), (ctypes.c_void_p * n_ops)(*dmsptr), (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops), ctypes.c_int(comp), shls_slice, ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cvhfopt, c_atm.ctypes.data_as(ctypes.c_void_p), natm, c_bas.ctypes.data_as(ctypes.c_void_p), nbas, c_env.ctypes.data_as(ctypes.c_void_p)) if with_j: # vj must be symmetric for i in range(n_dm): lib.hermi_triu(vj[i], 1, inplace=True) vj = vj.reshape(dms_shape) if with_k: if hermi != 0: for i in range(n_dm): lib.hermi_triu(vk[i], hermi, inplace=True) vk = vk.reshape(dms_shape) return vj, vk
def get_jk(mols, dms, scripts=['ijkl,ji->kl'], intor='int2e_sph', aosym='s1', comp=None, hermi=0, shls_slice=None, verbose=logger.WARN, vhfopt=None): '''Compute J/K matrices for the given density matrix Args: mols : an instance of :class:`Mole` or a list of `Mole` objects dms : ndarray or list of ndarrays A density matrix or a list of density matrices Kwargs: hermi : int Whether the returned J (K) matrix is hermitian | 0 : no hermitian or symmetric | 1 : hermitian | 2 : anti-hermitian intor : str 2-electron integral name. See :func:`getints` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) | 'a2ij' : anti-symmetry between i, j in (ij|kl) | 'a2kl' : anti-symmetry between k, l in (ij|kl) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. scripts : string or a list of strings Contraction description (following numpy.einsum convention) based on letters [ijkl]. Each script will be one-to-one applied to each entry of dms. So it must have the same number of elements as the dms, len(scripts) == len(dms). shls_slice : 8-element list (ish_start, ish_end, jsh_start, jsh_end, ksh_start, ksh_end, lsh_start, lsh_end) Returns: Depending on the number of density matrices, the function returns one J/K matrix or a list of J/K matrices (the same number of entries as the input dms). Each JK matrices may be a 2D array or 3D array if the AO integral has multiple components. Examples: >>> from pyscf import gto >>> mol = gto.M(atom='H 0 -.5 0; H 0 .5 0', basis='cc-pvdz') >>> nao = mol.nao_nr() >>> dm = numpy.random.random((nao,nao)) >>> # Default, Coulomb matrix >>> vj = get_jk(mol, dm) >>> # Coulomb matrix with 8-fold permutation symmetry for AO integrals >>> vj = get_jk(mol, dm, 'ijkl,ji->kl', aosym='s8') >>> # Exchange matrix with 8-fold permutation symmetry for AO integrals >>> vk = get_jk(mol, dm, 'ijkl,jk->il', aosym='s8') >>> # Compute coulomb and exchange matrices together >>> vj, vk = get_jk(mol, (dm,dm), ('ijkl,ji->kl','ijkl,li->kj'), aosym='s8') >>> # Analytical gradients for coulomb matrix >>> j1 = get_jk(mol, dm, 'ijkl,lk->ij', intor='int2e_ip1_sph', aosym='s2kl', comp=3) >>> # contraction across two molecules >>> mol1 = gto.M(atom='He 2 0 0', basis='6-31g') >>> nao1 = mol1.nao_nr() >>> dm1 = numpy.random.random((nao1,nao1)) >>> # Coulomb interaction between two molecules, note 4-fold symmetry can be applied >>> jcross = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', aosym='s4') >>> ecoul = numpy.einsum('ij,ij', jcross, dm1) >>> # Exchange interaction between two molecules, no symmetry can be used >>> kcross = get_jk((mol1,mol,mol,mol1), dm, scripts='ijkl,jk->il') >>> ex = numpy.einsum('ij,ji', kcross, dm1) >>> # Analytical gradients for coulomb matrix between two molecules >>> jcros1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3) >>> # Analytical gradients for coulomb interaction between 1s density and the other molecule >>> jpart1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3, ... shls_slice=(0,1,0,1,0,mol.nbas,0,mol.nbas)) ''' if isinstance(mols, (tuple, list)): intor, comp = gto.moleintor._get_intor_and_comp(mols[0]._add_suffix(intor), comp) assert(len(mols) == 4) assert(mols[0].cart == mols[1].cart == mols[2].cart == mols[3].cart) if shls_slice is None: shls_slice = numpy.array([(0, mol.nbas) for mol in mols]) else: shls_slice = numpy.asarray(shls_slice).reshape(4,2) # concatenate unique mols and build corresponding shls_slice mol_ids = [id(mol) for mol in mols] atm, bas, env = mols[0]._atm, mols[0]._bas, mols[0]._env bas_start = numpy.zeros(4, dtype=int) for m in range(1,4): first = mol_ids.index(mol_ids[m]) if first == m: # the unique mol, not repeated in mols bas_start[m] = bas.shape[0] atm, bas, env = gto.conc_env(atm, bas, env, mols[m]._atm, mols[m]._bas, mols[m]._env) else: bas_start[m] = bas_start[first] shls_slice[m] += bas_start[m] shls_slice = shls_slice.flatten() else: intor, comp = gto.moleintor._get_intor_and_comp(mols._add_suffix(intor), comp) atm, bas, env = mols._atm, mols._bas, mols._env if shls_slice is None: shls_slice = (0, mols.nbas) * 4 single_script = isinstance(scripts, str) if single_script: scripts = [scripts] if isinstance(dms, numpy.ndarray) and dms.ndim == 2: dms = [dms] assert(len(scripts) == len(dms)) #format scripts descript = [] for script in scripts: dmsym, vsym = script.lower().split(',')[1].split('->') if vsym[:2] in ('a2', 's2', 's1'): descript.append(dmsym + '->' + vsym) elif hermi == 0: descript.append(dmsym + '->s1' + vsym) else: descript.append(dmsym + '->s2' + vsym) vs = _vhf.direct_bindm(intor, aosym, descript, dms, comp, atm, bas, env, vhfopt=vhfopt, shls_slice=shls_slice) if hermi != 0: for v in vs: if v.ndim == 3: for vi in v: lib.hermi_triu(vi, hermi, inplace=True) else: lib.hermi_triu(v, hermi, inplace=True) if single_script: vs = vs[0] return vs
def get_jk_favorj(sgx, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): t0 = time.clock(), time.time() mol = sgx.mol grids = sgx.grids gthrd = sgx.grids_thrd dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] if sgx.debug: batch_nuc = _gen_batch_nuc(mol) else: batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol, sgx._opt) sn = numpy.zeros((nao,nao)) ngrids = grids.coords.shape[0] max_memory = sgx.max_memory - lib.current_memory()[0] sblk = sgx.blockdim blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2)))) for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1,None] sn += lib.dot(ao.T, wao) ovlp = mol.intor_symmetric('int1e_ovlp') proj = scipy.linalg.solve(sn, ovlp) proj_dm = lib.einsum('ki,xij->xkj', proj, dms) t1 = logger.timer_debug1(mol, "sgX initialziation", *t0) vj = numpy.zeros_like(dms) vk = numpy.zeros_like(dms) tnuc = 0, 0 for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1,None] fg = lib.einsum('gi,xij->xgj', wao, proj_dm) mask = numpy.zeros(i1-i0, dtype=bool) for i in range(nset): mask |= numpy.any(fg[i]>gthrd, axis=1) mask |= numpy.any(fg[i]<-gthrd, axis=1) if not numpy.all(mask): ao = ao[mask] fg = fg[:,mask] coords = coords[mask] if with_j: rhog = numpy.einsum('xgu,gu->xg', fg, ao) else: rhog = None if sgx.debug: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() gbn = batch_nuc(mol, coords) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: jpart = numpy.einsum('guv,xg->xuv', gbn, rhog) if with_k: gv = lib.einsum('gtv,xgt->xgv', gbn, fg) gbn = None else: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() if with_j: rhog = rhog.copy() jpart, gv = batch_jk(mol, coords, rhog, fg.copy()) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: vj += jpart if with_k: for i in range(nset): vk[i] += lib.einsum('gu,gv->uv', ao, gv[i]) jpart = gv = None t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1) tdot = t2[0] - t1[0] - tnuc[0] , t2[1] - t1[1] - tnuc[1] logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); ' 'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0], tdot[1]) for i in range(nset): lib.hermi_triu(vj[i], inplace=True) if with_k and hermi == 1: vk = (vk + vk.transpose(0,2,1))*.5 logger.timer(mol, "vj and vk", *t0) return vj.reshape(dm_shape), vk.reshape(dm_shape)
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:, mo_occ > 0] dm0 = numpy.dot(mocc, mocc.T) * 2 ni = copy.copy(mf._numint) if USE_XCFUN: try: ni.libxc = dft.xcfun xctype = ni._xc_type(mf.xc) except (ImportError, KeyError, NotImplementedError): ni.libxc = dft.libxc xctype = ni._xc_type(mf.xc) else: xctype = ni._xc_type(mf.xc) grids = mf.grids hyb = ni.libxc.hybrid_coeff(mf.xc) max_memory = 4000 h1a = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3) h1ao[:, p0:p1] += h1a[:, p0:p1] h1ao = h1ao + h1ao.transpose(0, 2, 1) shls_slice = (shl0, shl1) + (0, mol.nbas) * 3 int2e_ip1 = mol._add_suffix('int2e_ip1') if abs(hyb) > 1e-10: vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm(int2e_ip1, 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 - hyb * .5 * vk1 veff[:, p0:p1] += vj2 - hyb * .5 * vk2 else: vj1, vj2 = \ _vhf.direct_bindm(int2e_ip1, 's2kl', ('ji->s2kl', 'lk->s1ij'), (-dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 veff[:, p0:p1] += vj2 if xctype == 'LDA': ao_deriv = 1 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho = vxc[0] frr = fxc[0] half = lib.dot(ao[0], dm0[:, p0:p1].copy()) rho1 = numpy.einsum('xpi,pi->xp', ao[1:, :, p0:p1], half) aow = numpy.einsum('pi,xp->xpi', ao[0], weight * frr * rho1) aow1 = numpy.einsum('xpi,p->xpi', ao[1:, :, p0:p1], weight * vrho) aow[:, :, p0:p1] += aow1 veff[0] += lib.dot(-aow[0].T, ao[0]) veff[1] += lib.dot(-aow[1].T, ao[0]) veff[2] += lib.dot(-aow[2].T, ao[0]) half = aow = aow1 = None elif xctype == 'GGA': def get_wv(rho, rho1, weight, vxc, fxc): vgamma = vxc[1] frr, frg, fgg = fxc[:3] ngrid = weight.size sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:]) wv = numpy.empty((4, ngrid)) wv[0] = frr * rho1[0] wv[0] += frg * sigma1 * 2 wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:] wv[1:] += vgamma * rho1[1:] * 2 wv *= weight return wv ao_deriv = 2 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho, vgamma = vxc[:2] # (d_X \nabla_x mu) nu DM_{mu,nu} half = lib.dot(ao[0], dm0[:, p0:p1].copy()) rho1X = numpy.einsum('xpi,pi->xp', ao[[1, XX, XY, XZ], :, p0:p1], half) rho1Y = numpy.einsum('xpi,pi->xp', ao[[2, YX, YY, YZ], :, p0:p1], half) rho1Z = numpy.einsum('xpi,pi->xp', ao[[3, ZX, ZY, ZZ], :, p0:p1], half) # (d_X mu) (\nabla_x nu) DM_{mu,nu} half = lib.dot(ao[1], dm0[:, p0:p1].copy()) rho1X[1] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[1] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[1] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) half = lib.dot(ao[2], dm0[:, p0:p1].copy()) rho1X[2] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[2] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[2] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) half = lib.dot(ao[3], dm0[:, p0:p1].copy()) rho1X[3] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[3] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[3] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) wv = get_wv(rho, rho1X, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Y, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Z, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = numpy.empty_like(rho) wv[0] = weight * vrho wv[1:] = rho[1:] * (weight * vgamma * 2) aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0, p0:p1] -= lib.dot(ao[1, :, p0:p1].T.copy(), aow) veff[1, p0:p1] -= lib.dot(ao[2, :, p0:p1].T.copy(), aow) veff[2, p0:p1] -= lib.dot(ao[3, :, p0:p1].T.copy(), aow) aow = numpy.einsum('npi,np->pi', ao[[XX, XY, XZ], :, p0:p1], wv[1:4]) veff[0, p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[YX, YY, YZ], :, p0:p1], wv[1:4]) veff[1, p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[ZX, ZY, ZZ], :, p0:p1], wv[1:4]) veff[2, p0:p1] -= lib.dot(aow.T, ao[0]) else: raise NotImplementedError('meta-GGA') veff = veff + veff.transpose(0, 2, 1) if chkfile is None: h1aos.append(h1ao + veff) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao + veff) if chkfile is None: return h1aos else: return chkfile
def get_jk(mols, dms, scripts=['ijkl,ji->kl'], intor='int2e_sph', aosym='s1', comp=None, hermi=0, shls_slice=None, verbose=logger.WARN, vhfopt=None): '''Compute J/K matrices for the given density matrix Args: mols : an instance of :class:`Mole` or a list of `Mole` objects dms : ndarray or list of ndarrays A density matrix or a list of density matrices Kwargs: hermi : int Whether the returned J (K) matrix is hermitian | 0 : no hermitian or symmetric | 1 : hermitian | 2 : anti-hermitian intor : str 2-electron integral name. See :func:`getints` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) | 'a2ij' : anti-symmetry between i, j in (ij|kl) | 'a2kl' : anti-symmetry between k, l in (ij|kl) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. scripts : string or a list of strings Contraction description (following numpy.einsum convention) based on letters [ijkl]. Each script will be one-to-one applied to each entry of dms. So it must have the same number of elements as the dms, len(scripts) == len(dms). shls_slice : 8-element list (ish_start, ish_end, jsh_start, jsh_end, ksh_start, ksh_end, lsh_start, lsh_end) Returns: Depending on the number of density matrices, the function returns one J/K matrix or a list of J/K matrices (the same number of entries as the input dms). Each JK matrices may be a 2D array or 3D array if the AO integral has multiple components. Examples: >>> from pyscf import gto >>> mol = gto.M(atom='H 0 -.5 0; H 0 .5 0', basis='cc-pvdz') >>> nao = mol.nao_nr() >>> dm = numpy.random.random((nao,nao)) >>> # Default, Coulomb matrix >>> vj = get_jk(mol, dm) >>> # Coulomb matrix with 8-fold permutation symmetry for AO integrals >>> vj = get_jk(mol, dm, 'ijkl,ji->kl', aosym='s8') >>> # Exchange matrix with 8-fold permutation symmetry for AO integrals >>> vk = get_jk(mol, dm, 'ijkl,jk->il', aosym='s8') >>> # Compute coulomb and exchange matrices together >>> vj, vk = get_jk(mol, (dm,dm), ('ijkl,ji->kl','ijkl,li->kj'), aosym='s8') >>> # Analytical gradients for coulomb matrix >>> j1 = get_jk(mol, dm, 'ijkl,lk->ij', intor='int2e_ip1_sph', aosym='s2kl', comp=3) >>> # contraction across two molecules >>> mol1 = gto.M(atom='He 2 0 0', basis='6-31g') >>> nao1 = mol1.nao_nr() >>> dm1 = numpy.random.random((nao1,nao1)) >>> # Coulomb interaction between two molecules, note 4-fold symmetry can be applied >>> jcross = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', aosym='s4') >>> ecoul = numpy.einsum('ij,ij', jcross, dm1) >>> # Exchange interaction between two molecules, no symmetry can be used >>> kcross = get_jk((mol1,mol,mol,mol1), dm, scripts='ijkl,jk->il') >>> ex = numpy.einsum('ij,ji', kcross, dm1) >>> # Analytical gradients for coulomb matrix between two molecules >>> jcros1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3) >>> # Analytical gradients for coulomb interaction between 1s density and the other molecule >>> jpart1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3, ... shls_slice=(0,1,0,1,0,mol.nbas,0,mol.nbas)) ''' if isinstance(mols, (tuple, list)): intor, comp = gto.moleintor._get_intor_and_comp( mols[0]._add_suffix(intor), comp) assert (len(mols) == 4) assert (mols[0].cart == mols[1].cart == mols[2].cart == mols[3].cart) if shls_slice is None: shls_slice = numpy.array([(0, mol.nbas) for mol in mols]) else: shls_slice = numpy.asarray(shls_slice).reshape(4, 2) # concatenate unique mols and build corresponding shls_slice mol_ids = [id(mol) for mol in mols] atm, bas, env = mols[0]._atm, mols[0]._bas, mols[0]._env bas_start = numpy.zeros(4, dtype=int) for m in range(1, 4): first = mol_ids.index(mol_ids[m]) if first == m: # the unique mol, not repeated in mols bas_start[m] = bas.shape[0] atm, bas, env = gto.conc_env(atm, bas, env, mols[m]._atm, mols[m]._bas, mols[m]._env) else: bas_start[m] = bas_start[first] shls_slice[m] += bas_start[m] shls_slice = shls_slice.flatten() else: intor, comp = gto.moleintor._get_intor_and_comp( mols._add_suffix(intor), comp) atm, bas, env = mols._atm, mols._bas, mols._env if shls_slice is None: shls_slice = (0, mols.nbas) * 4 single_script = isinstance(scripts, str) if single_script: scripts = [scripts] # Check if letters other than ijkl were provided. if set(''.join(scripts[:4])).difference('ijkl,->as12'): # Translate these letters to ijkl if possible scripts = [ script.translate({ ord(script[0]): 'i', ord(script[1]): 'j', ord(script[2]): 'k', ord(script[3]): 'l' }) for script in scripts ] if set(''.join(scripts[:4])).difference('ijkl,->as12'): raise RuntimeError('Scripts unsupported %s' % scripts) if isinstance(dms, numpy.ndarray) and dms.ndim == 2: dms = [dms] assert (len(scripts) == len(dms)) #format scripts descript = [] for script in scripts: dmsym, vsym = script.lower().split(',')[1].split('->') if vsym[:2] in ('a2', 's2', 's1'): descript.append(dmsym + '->' + vsym) elif hermi == 0: descript.append(dmsym + '->s1' + vsym) else: descript.append(dmsym + '->s2' + vsym) vs = _vhf.direct_bindm(intor, aosym, descript, dms, comp, atm, bas, env, vhfopt=vhfopt, shls_slice=shls_slice) if hermi != 0: for v in vs: if v.ndim == 3: for vi in v: lib.hermi_triu(vi, hermi, inplace=True) else: lib.hermi_triu(v, hermi, inplace=True) if single_script: vs = vs[0] return vs
def pspace (fci, h1e, eri, norb, nelec, transformer, hdiag_det=None, hdiag_csf=None, npsp=200): ''' Note that getting pspace for npsp CSFs is substantially more costly than getting it for npsp determinants, until I write code than can evaluate Hamiltonian matrix elements of CSFs directly. On the other hand a pspace of determinants contains many redundant degrees of freedom for the same reason. Therefore I have reduced the default pspace size by a factor of 2.''' if norb > 63: raise NotImplementedError('norb > 63') t0 = (time.process_time (), time.time ()) neleca, nelecb = _unpack_nelec(nelec) h1e = np.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag_det is None: hdiag_det = fci.make_hdiag(h1e, eri, norb, nelec) if hdiag_csf is None: hdiag_csf = fci.make_hdiag_csf(h1e, eri, norb, nelec, hdiag_det=hdiag_det) csf_addr = np.arange (hdiag_csf.size, dtype=np.int) if transformer.wfnsym is None: ncsf_sym = hdiag_csf.size else: idx_sym = transformer.confsym[transformer.econf_csf_mask] == transformer.wfnsym ncsf_sym = np.count_nonzero (idx_sym) csf_addr = csf_addr[idx_sym] if ncsf_sym > npsp: try: csf_addr = csf_addr[np.argpartition(hdiag_csf[csf_addr], npsp-1)[:npsp]] except AttributeError: csf_addr = csf_addr[np.argsort(hdiag_csf[csf_addr])[:npsp]] # To build econf_addr = np.unique (transformer.econf_csf_mask[csf_addr]) det_addr = np.concatenate ([np.nonzero (transformer.econf_det_mask == conf)[0] for conf in econf_addr]) lib.logger.debug (fci, ("csf.pspace: Lowest-energy %s CSFs correspond to %s configurations" " which are spanned by %s determinants"), npsp, econf_addr.size, det_addr.size) addra, addrb = divmod(det_addr, nb) stra = cistring.addrs2str(norb, neleca, addra) strb = cistring.addrs2str(norb, nelecb, addrb) npsp_det = len(det_addr) h0 = np.zeros((npsp_det,npsp_det)) h1e_ab = unpack_h1e_ab (h1e) h1e_a = np.ascontiguousarray(h1e_ab[0]) h1e_b = np.ascontiguousarray(h1e_ab[1]) g2e = ao2mo.restore(1, eri, norb) g2e_ab = g2e_bb = g2e_aa = g2e _debug_g2e (fci, g2e, eri, norb) # Exploring g2e nan bug; remove later? t0 = lib.logger.timer (fci, "csf.pspace: index manipulation", *t0) libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p), h1e_a.ctypes.data_as(ctypes.c_void_p), h1e_b.ctypes.data_as(ctypes.c_void_p), g2e_aa.ctypes.data_as(ctypes.c_void_p), g2e_ab.ctypes.data_as(ctypes.c_void_p), g2e_bb.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(npsp_det)) t0 = lib.logger.timer (fci, "csf.pspace: pspace Hamiltonian in determinant basis", *t0) for i in range(npsp_det): h0[i,i] = hdiag_det[det_addr[i]] h0 = lib.hermi_triu(h0) try: if fci.verbose >= lib.logger.DEBUG: evals_before = scipy.linalg.eigh (h0)[0] except ValueError as e: lib.logger.debug (fci, ("ERROR: h0 has {} infs, {} nans; h1e_a has {} infs, {} nans; " "h1e_b has {} infs, {} nans; g2e has {} infs, {} nans, norb = {}, npsp_det = {}").format ( np.count_nonzero (np.isinf (h0)), np.count_nonzero (np.isnan (h0)), np.count_nonzero (np.isinf (h1e_a)), np.count_nonzero (np.isnan (h1e_a)), np.count_nonzero (np.isinf (h1e_b)), np.count_nonzero (np.isnan (h1e_b)), np.count_nonzero (np.isinf (g2e)), np.count_nonzero (np.isnan (g2e)), norb, npsp_det)) evals_before = np.zeros (npsp_det) h0, csf_addr = transformer.mat_det2csf_confspace (h0, econf_addr) t0 = lib.logger.timer (fci, "csf.pspace: transform pspace Hamiltonian into CSF basis", *t0) if fci.verbose >= lib.logger.DEBUG: lib.logger.debug2 (fci, "csf.pspace: eigenvalues of h0 before transformation %s", evals_before) evals_after = scipy.linalg.eigh (h0)[0] lib.logger.debug2 (fci, "csf.pspace: eigenvalues of h0 after transformation %s", evals_after) idx = [np.argmin (np.abs (evals_before - ev)) for ev in evals_after] resid = evals_after - evals_before[idx] lib.logger.debug2 (fci, "csf.pspace: best h0 eigenvalue matching differences after transformation: %s", resid) lib.logger.debug (fci, "csf.pspace: if the transformation of h0 worked the following number will be zero: %s", np.max (np.abs(resid))) # We got extra CSFs from building the configurations most of the time. if csf_addr.size > npsp: try: csf_addr_2 = np.argpartition(np.diag (h0), npsp-1)[:npsp] except AttributeError: csf_addr_2 = np.argsort(np.diag (h0))[:npsp] csf_addr = csf_addr[csf_addr_2] h0 = h0[np.ix_(csf_addr_2,csf_addr_2)] npsp_csf = csf_addr.size lib.logger.debug (fci, "csf_solver.pspace: asked for %s-CSF pspace; found %s CSFs", npsp, npsp_csf) t0 = lib.logger.timer (fci, "csf.pspace wrapup", *t0) return csf_addr, h0
def test_direct_jk_s2(self): numpy.random.seed(15) dm1 = numpy.random.random((nao,nao)) dm1 = dm1 + dm1.T vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1) vj1, vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs8', 'CVHFnrs8_ji_s2kl', 'CVHFnrs8_jk_s2il') self.assertTrue(numpy.allclose(vj0,vj1)) self.assertTrue(numpy.allclose(vk0,vk1)) eri1 = ao2mo.restore(1, rhf._eri, nao) vj0 = numpy.einsum('ijkl,kl->ij', eri1, dm1) vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1) vj1, vj2 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4', 'CVHFnrs4_ji_s2kl', 'CVHFnrs4_jk_s2il') self.assertTrue(numpy.allclose(vj0,vj1)) self.assertTrue(numpy.allclose(vk0,vj2)) vj1, vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4', 'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il') self.assertTrue(numpy.allclose(vk0,vj1)) self.assertTrue(numpy.allclose(vk0,vk1)) vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1) vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4', 'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il', 'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il') self.assertTrue(numpy.allclose(vk0,vk1[0])) self.assertTrue(numpy.allclose(vk0,vk1[1])) self.assertTrue(numpy.allclose(vk0,vk1[2])) self.assertTrue(numpy.allclose(vk0,vk1[3])) vj0 = numpy.einsum('ijkl,kl->ij', eri1, dm1) vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1) vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs2kl', 'CVHFnrs2kl_ji_s2kl', 'CVHFnrs2kl_lk_s2ij', 'CVHFnrs2kl_jk_s2il', 'CVHFnrs2kl_li_s2kj') self.assertTrue(numpy.allclose(vj0,vk1[0])) self.assertTrue(numpy.allclose(vj0,vk1[1])) self.assertTrue(numpy.allclose(vk0,vk1[2])) self.assertTrue(numpy.allclose(vk0,vk1[3])) vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs2ij', 'CVHFnrs2ij_ji_s2kl', 'CVHFnrs2ij_lk_s2ij', 'CVHFnrs2ij_jk_s2il', 'CVHFnrs2ij_li_s2kj') self.assertTrue(numpy.allclose(vj0,vk1[0])) self.assertTrue(numpy.allclose(vj0,vk1[1])) self.assertTrue(numpy.allclose(vk0,vk1[2])) self.assertTrue(numpy.allclose(vk0,vk1[3])) vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs1', 'CVHFnrs1_ji_s2kl', 'CVHFnrs1_lk_s2ij', 'CVHFnrs1_jk_s2il', 'CVHFnrs1_li_s2kj') self.assertTrue(numpy.allclose(vj0,vk1[0])) self.assertTrue(numpy.allclose(vj0,vk1[1])) self.assertTrue(numpy.allclose(vk0,vk1[2])) self.assertTrue(numpy.allclose(vk0,vk1[3])) vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1) vj1, vk1 = runjk(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs8', 'CVHFnrs8_ji_s2kl', 'CVHFnrs8_jk_s2il') vj1 = lib.hermi_triu(vj1, 1) vk1 = lib.hermi_triu(vk1, 1) self.assertTrue(numpy.allclose(vj0,vj1)) self.assertTrue(numpy.allclose(vk0,vk1))
def incore(eri, dms, hermi=0, with_j=True, with_k=True): assert (eri.dtype == numpy.double) eri = numpy.asarray(eri, order='C') dms = numpy.asarray(dms, order='C') dms_shape = dms.shape nao = dms_shape[-1] dms = dms.reshape(-1, nao, nao) n_dm = dms.shape[0] vj = vk = None if with_j: vj = numpy.zeros((n_dm, nao, nao)) if with_k: vk = numpy.zeros((n_dm, nao, nao)) dmsptr = [] vjkptr = [] fjkptr = [] npair = nao * (nao + 1) // 2 if eri.ndim == 2 and npair * npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') if with_j: # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvj) if with_k: # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvk) elif eri.ndim == 1 and npair * (npair + 1) // 2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') if with_j: fvj = _fpointer('CVHFics8_tridm_vj') tridms = lib.pack_tril(lib.hermi_sum(dms, axes=(0, 2, 1))) idx = numpy.arange(nao) tridms[:, idx * (idx + 1) // 2 + idx] *= .5 for i, tridm in enumerate(tridms): dmsptr.append(tridm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvj) if with_k: if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvk) else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dms_shape, eri.shape)) n_ops = len(dmsptr) fdrv(eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_void_p * n_ops)(*dmsptr), (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops), ctypes.c_int(nao), (ctypes.c_void_p * n_ops)(*fjkptr)) if with_j: for i in range(n_dm): lib.hermi_triu(vj[i], 1, inplace=True) vj = vj.reshape(dms_shape) if with_k: if hermi != 0: for i in range(n_dm): lib.hermi_triu(vk[i], hermi, inplace=True) vk = vk.reshape(dms_shape) return vj, vk
def direct(dms, atm, bas, env, vhfopt=None, hermi=0, cart=False): c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C') c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C') c_env = numpy.asarray(env, dtype=numpy.double, order='C') natm = ctypes.c_int(c_atm.shape[0]) nbas = ctypes.c_int(c_bas.shape[0]) if isinstance(dms, numpy.ndarray) and dms.ndim == 2: dms = dms[numpy.newaxis,:,:] n_dm = len(dms) nao = dms[0].shape[0] dms = numpy.asarray(dms, order='C') if vhfopt is None: if cart: intor = 'int2e_cart' else: intor = 'int2e_sph' cintopt = make_cintopt(c_atm, c_bas, c_env, intor) cvhfopt = lib.c_null_ptr() else: vhfopt.set_dm(dms, atm, bas, env) cvhfopt = vhfopt._this cintopt = vhfopt._cintopt intor = vhfopt._intor cintor = _fpointer(intor) fdrv = getattr(libcvhf, 'CVHFnr_direct_drv') fdot = _fpointer('CVHFdot_nrs8') fvj = _fpointer('CVHFnrs8_ji_s2kl') if hermi == 1: fvk = _fpointer('CVHFnrs8_li_s2kj') else: fvk = _fpointer('CVHFnrs8_li_s1kj') vjk = numpy.empty((2,n_dm,nao,nao)) fjk = (ctypes.c_void_p*(2*n_dm))() dmsptr = (ctypes.c_void_p*(2*n_dm))() vjkptr = (ctypes.c_void_p*(2*n_dm))() for i in range(n_dm): dmsptr[i] = dms[i].ctypes.data_as(ctypes.c_void_p) vjkptr[i] = vjk[0,i].ctypes.data_as(ctypes.c_void_p) fjk[i] = fvj for i in range(n_dm): dmsptr[n_dm+i] = dms[i].ctypes.data_as(ctypes.c_void_p) vjkptr[n_dm+i] = vjk[1,i].ctypes.data_as(ctypes.c_void_p) fjk[n_dm+i] = fvk shls_slice = (ctypes.c_int*8)(*([0, c_bas.shape[0]]*4)) ao_loc = make_loc(bas, intor) fdrv(cintor, fdot, fjk, dmsptr, vjkptr, ctypes.c_int(n_dm*2), ctypes.c_int(1), shls_slice, ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cvhfopt, c_atm.ctypes.data_as(ctypes.c_void_p), natm, c_bas.ctypes.data_as(ctypes.c_void_p), nbas, c_env.ctypes.data_as(ctypes.c_void_p)) # vj must be symmetric for idm in range(n_dm): vjk[0,idm] = lib.hermi_triu(vjk[0,idm], 1) if hermi != 0: # vk depends for idm in range(n_dm): vjk[1,idm] = lib.hermi_triu(vjk[1,idm], hermi) if n_dm == 1: vjk = vjk.reshape(2,nao,nao) return vjk
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:,mo_occ>0] dm0 = numpy.dot(mocc, mocc.T) * 2 ni = copy.copy(mf._numint) if USE_XCFUN: try: ni.libxc = dft.xcfun xctype = ni._xc_type(mf.xc) except (ImportError, KeyError, NotImplementedError): ni.libxc = dft.libxc xctype = ni._xc_type(mf.xc) else: xctype = ni._xc_type(mf.xc) grids = mf.grids hyb = ni.libxc.hybrid_coeff(mf.xc) max_memory = 4000 h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) + mol.intor('cint1e_ipnuc_sph', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3) h1ao[:,p0:p1] += h1a[:,p0:p1] h1ao = h1ao + h1ao.transpose(0,2,1) shls_slice = (shl0, shl1) + (0, mol.nbas)*3 if abs(hyb) > 1e-10: vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm('cint2e_ip1_sph', 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 - hyb*.5*vk1 veff[:,p0:p1] += vj2 - hyb*.5*vk2 else: vj1, vj2 = \ _vhf.direct_bindm('cint2e_ip1_sph', 's2kl', ('ji->s2kl', 'lk->s1ij'), (-dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 veff[:,p0:p1] += vj2 if xctype == 'LDA': ao_deriv = 1 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab): rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho = vxc[0] frr = fxc[0] half = lib.dot(ao[0], dm0[:,p0:p1].copy()) rho1 = numpy.einsum('xpi,pi->xp', ao[1:,:,p0:p1], half) aow = numpy.einsum('pi,xp->xpi', ao[0], weight*frr*rho1) aow1 = numpy.einsum('xpi,p->xpi', ao[1:,:,p0:p1], weight*vrho) aow[:,:,p0:p1] += aow1 veff[0] += lib.dot(-aow[0].T, ao[0]) veff[1] += lib.dot(-aow[1].T, ao[0]) veff[2] += lib.dot(-aow[2].T, ao[0]) half = aow = aow1 = None elif xctype == 'GGA': def get_wv(rho, rho1, weight, vxc, fxc): vgamma = vxc[1] frr, frg, fgg = fxc[:3] ngrid = weight.size sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:]) wv = numpy.empty((4,ngrid)) wv[0] = frr * rho1[0] wv[0] += frg * sigma1 * 2 wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:] wv[1:] += vgamma * rho1[1:] * 2 wv *= weight return wv ao_deriv = 2 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab): rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho, vgamma = vxc[:2] # (d_X \nabla_x mu) nu DM_{mu,nu} half = lib.dot(ao[0], dm0[:,p0:p1].copy()) rho1X = numpy.einsum('xpi,pi->xp', ao[[1,XX,XY,XZ],:,p0:p1], half) rho1Y = numpy.einsum('xpi,pi->xp', ao[[2,YX,YY,YZ],:,p0:p1], half) rho1Z = numpy.einsum('xpi,pi->xp', ao[[3,ZX,ZY,ZZ],:,p0:p1], half) # (d_X mu) (\nabla_x nu) DM_{mu,nu} half = lib.dot(ao[1], dm0[:,p0:p1].copy()) rho1X[1] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[1] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[1] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) half = lib.dot(ao[2], dm0[:,p0:p1].copy()) rho1X[2] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[2] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[2] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) half = lib.dot(ao[3], dm0[:,p0:p1].copy()) rho1X[3] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[3] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[3] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) wv = get_wv(rho, rho1X, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Y, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Z, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = numpy.empty_like(rho) wv[0] = weight * vrho wv[1:] = rho[1:] * (weight * vgamma * 2) aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0,p0:p1] -= lib.dot(ao[1,:,p0:p1].T.copy(), aow) veff[1,p0:p1] -= lib.dot(ao[2,:,p0:p1].T.copy(), aow) veff[2,p0:p1] -= lib.dot(ao[3,:,p0:p1].T.copy(), aow) aow = numpy.einsum('npi,np->pi', ao[[XX,XY,XZ],:,p0:p1], wv[1:4]) veff[0,p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[YX,YY,YZ],:,p0:p1], wv[1:4]) veff[1,p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[ZX,ZY,ZZ],:,p0:p1], wv[1:4]) veff[2,p0:p1] -= lib.dot(aow.T, ao[0]) else: raise NotImplementedError('meta-GGA') veff = veff + veff.transpose(0,2,1) if chkfile is None: h1aos.append(h1ao+veff) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao+veff) if chkfile is None: return h1aos else: return chkfile