def test_transpose_sum(self): a = numpy.random.random((3, 400, 400)) self.assertAlmostEqual( abs(a[0] + a[0].T - lib.hermi_sum(a[0])).max(), 0, 12) self.assertAlmostEqual( abs(a + a.transpose(0, 2, 1) - lib.hermi_sum(a, (0, 2, 1))).max(), 0, 12) self.assertAlmostEqual( abs(a + a.transpose(0, 2, 1) - lib.hermi_sum(a, (0, 2, 1), inplace=True)).max(), 0, 12) a = numpy.random.random((3, 400, 400)) + numpy.random.random( (3, 400, 400)) * 1j self.assertAlmostEqual( abs(a[0] + a[0].T.conj() - lib.hermi_sum(a[0])).max(), 0, 12) self.assertAlmostEqual( abs(a + a.transpose(0, 2, 1).conj() - lib.hermi_sum(a, (0, 2, 1))).max(), 0, 12) self.assertAlmostEqual( abs(a + a.transpose(0, 2, 1) - lib.hermi_sum(a, (0, 2, 1), hermi=3)).max(), 0, 12) self.assertAlmostEqual( abs(a + a.transpose(0, 2, 1).conj() - lib.hermi_sum(a, (0, 2, 1), inplace=True)).max(), 0, 12) a = numpy.random.random((400, 400)) b = a + a.T.conj() c = lib.transpose_sum(a) self.assertAlmostEqual(abs(b - c).max(), 0, 12) a = (a * 1000).astype(numpy.int32) b = a + a.T c = lib.transpose_sum(a) self.assertAlmostEqual(abs(b - c).max(), 0, 12) self.assertTrue(c.dtype == numpy.int32)
def _check_(c): c = lib.transpose_sum(c, inplace=True) c *= .5 norm = numpy.linalg.norm(c) if abs(norm-1) > 1e-6: raise ValueError('State not singlet %g' % abs(numpy.linalg.norm(c)-1)) return c/norm
def part_eri_hermi(eri, norb, nimp): eri1 = ao2mo.restore(4, eri, norb) for i in range(eri1.shape[0]): tmp = lib.unpack_tril(eri1[i]) tmp[nimp:] = 0 eri1[i] = lib.pack_tril(tmp + tmp.T) eri1 = lib.transpose_sum(eri1, inplace=True) return ao2mo.restore(8, eri1, norb) * 0.25
def contract_2e(eri, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') eri = ao2mo.restore(4, eri, norb) lib.transpose_sum(eri, inplace=True) eri *= .5 link_index = _unpack(norb, nelec, link_index) na, nlink = link_index.shape[:2] assert(fcivec.size == na**2) ci1 = numpy.empty((na,na)) libfci.FCIcontract_2e_spin0(eri.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nlink), link_index.ctypes.data_as(ctypes.c_void_p)) # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
def contract_2e(eri, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') eri = ao2mo.restore(4, eri, norb) lib.transpose_sum(eri, inplace=True) eri *= .5 link_index = _unpack(norb, nelec, link_index) na, nlink = link_index.shape[:2] assert (fcivec.size == na**2) ci1 = numpy.empty((na, na)) libfci.FCIcontract_2e_spin0(eri.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nlink), link_index.ctypes.data_as(ctypes.c_void_p)) # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
def reorder_rdm(rdm1, rdm2, inplace=False): nmo = rdm1.shape[0] if not inplace: rdm2 = rdm2.copy() for k in range(nmo): rdm2[:, k, k, :] -= rdm1 #return rdm1, rdm2 rdm2 = lib.transpose_sum(rdm2.reshape(nmo * nmo, -1), inplace=True) * .5 return rdm1, rdm2.reshape(nmo, nmo, nmo, nmo)
def reorder_rdm(rdm1, rdm2, inplace=False): nmo = rdm1.shape[0] if not inplace: rdm2 = rdm2.copy() for k in range(nmo): rdm2[:,k,k,:] -= rdm1.T #return rdm1, rdm2 rdm2 = lib.transpose_sum(rdm2.reshape(nmo*nmo,-1), inplace=True) * .5 return rdm1, rdm2.reshape(nmo,nmo,nmo,nmo)
def reorder_rdm(rdm1, rdm2, inplace=False): nmo = rdm1.shape[0] if not inplace: rdm2 = rdm2.copy() for k in range(nmo): rdm2[:, k, k, :] -= rdm1.T # Employing the particle permutation symmetry, average over two particles # to reduce numerical round off error rdm2 = lib.transpose_sum(rdm2.reshape(nmo * nmo, -1), inplace=True) * .5 return rdm1, rdm2.reshape(nmo, nmo, nmo, nmo)
def test_transpose_sum(self): a = numpy.random.random((3,400,400)) self.assertAlmostEqual(abs(a[0]+a[0].T - lib.hermi_sum(a[0])).max(), 0, 12) self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1))).max(), 0, 12) self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1), inplace=True)).max(), 0, 12) a = numpy.random.random((3,400,400)) + numpy.random.random((3,400,400)) * 1j self.assertAlmostEqual(abs(a[0]+a[0].T.conj() - lib.hermi_sum(a[0])).max(), 0, 12) self.assertAlmostEqual(abs(a+a.transpose(0,2,1).conj() - lib.hermi_sum(a,(0,2,1))).max(), 0, 12) self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1),hermi=3)).max(), 0, 12) self.assertAlmostEqual(abs(a+a.transpose(0,2,1).conj() - lib.hermi_sum(a,(0,2,1),inplace=True)).max(), 0, 12) a = numpy.random.random((400,400)) b = a + a.T.conj() c = lib.transpose_sum(a) self.assertAlmostEqual(abs(b-c).max(), 0, 12) a = (a*1000).astype(numpy.int32) b = a + a.T c = lib.transpose_sum(a) self.assertAlmostEqual(abs(b-c).max(), 0, 12) self.assertTrue(c.dtype == numpy.int32)
def contract_2e(eri, fcivec, norb, nelec, link_index=None, orbsym=None, wfnsym=0): if orbsym is None: return direct_spin0.contract_2e(eri, fcivec, norb, nelec, link_index) eri = ao2mo.restore(4, eri, norb) neleca, nelecb = direct_spin1._unpack_nelec(nelec) assert (neleca == nelecb) link_indexa = direct_spin0._unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] eri_irs, rank_eri, irrep_eri = direct_spin1_symm.reorder_eri( eri, norb, orbsym) strsa = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca)) aidx, link_indexa = direct_spin1_symm.gen_str_irrep( strsa, orbsym, link_indexa, rank_eri, irrep_eri) Tirrep = ctypes.c_void_p * TOTIRREPS linka_ptr = Tirrep( *[x.ctypes.data_as(ctypes.c_void_p) for x in link_indexa]) eri_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in eri_irs]) dimirrep = (ctypes.c_int * TOTIRREPS)(*[x.shape[0] for x in eri_irs]) fcivec_shape = fcivec.shape fcivec = fcivec.reshape((na, na), order='C') ci1new = numpy.zeros_like(fcivec) nas = (ctypes.c_int * TOTIRREPS)(*[x.size for x in aidx]) ci0 = [] ci1 = [] for ir in range(TOTIRREPS): ma, mb = aidx[ir].size, aidx[wfnsym ^ ir].size ci0.append(numpy.zeros((ma, mb))) ci1.append(numpy.zeros((ma, mb))) if ma > 0 and mb > 0: lib.take_2d(fcivec, aidx[ir], aidx[wfnsym ^ ir], out=ci0[ir]) ci0_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci0]) ci1_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci1]) libfci.FCIcontract_2e_symm1(eri_ptrs, ci0_ptrs, ci1_ptrs, ctypes.c_int(norb), nas, nas, ctypes.c_int(nlinka), ctypes.c_int(nlinka), linka_ptr, linka_ptr, dimirrep, ctypes.c_int(wfnsym)) for ir in range(TOTIRREPS): if ci0[ir].size > 0: lib.takebak_2d(ci1new, ci1[ir], aidx[ir], aidx[wfnsym ^ ir]) return lib.transpose_sum(ci1new, inplace=True).reshape(fcivec_shape)
def contract_1e(f1e, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') link_index = _unpack(norb, nelec, link_index) na, nlink = link_index.shape[:2] assert(fcivec.size == na**2) ci1 = numpy.empty_like(fcivec) f1e_tril = lib.pack_tril(f1e) libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nlink), link_index.ctypes.data_as(ctypes.c_void_p)) # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
def contract_1e(f1e, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') link_index = _unpack(norb, nelec, link_index) na, nlink = link_index.shape[:2] assert (fcivec.size == na**2) ci1 = numpy.empty_like(fcivec) f1e_tril = lib.pack_tril(f1e) libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nlink), link_index.ctypes.data_as(ctypes.c_void_p)) # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
def contract_2e(eri, civec_strs, norb, nelec, link_index=None, orbsym=None): ci_coeff, nelec, ci_strs = selected_ci._unpack(civec_strs, nelec) if link_index is None: link_index = selected_ci._all_linkstr_index(ci_strs, norb, nelec) cd_indexa, dd_indexa, cd_indexb, dd_indexb = link_index na, nlinka = nb, nlinkb = cd_indexa.shape[:2] eri = ao2mo.restore(1, eri, norb) eri1 = eri.transpose(0, 2, 1, 3) - eri.transpose(0, 2, 3, 1) idx, idy = numpy.tril_indices(norb, -1) idx = idx * norb + idy eri1 = lib.take_2d(eri1.reshape(norb**2, -1), idx, idx) * 2 lib.transpose_sum(eri1, inplace=True) eri1 *= .5 eri1, dd_indexa, dimirrep = selected_ci_symm.reorder4irrep( eri1, norb, dd_indexa, orbsym, -1) fcivec = ci_coeff.reshape(na, nb) ci1 = numpy.zeros_like(fcivec) # (aa|aa) if nelec[0] > 1: ma, mlinka = mb, mlinkb = dd_indexa.shape[:2] libfci.SCIcontract_2e_aaaa_symm( eri1.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(ma), ctypes.c_int(mlinka), dd_indexa.ctypes.data_as(ctypes.c_void_p), dimirrep.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(dimirrep))) h_ps = numpy.einsum('pqqs->ps', eri) * (.5 / nelec[0]) eri1 = eri.copy() for k in range(norb): eri1[:, :, k, k] += h_ps eri1[k, k, :, :] += h_ps eri1 = ao2mo.restore(4, eri1, norb) lib.transpose_sum(eri1, inplace=True) eri1 *= .5 eri1, cd_indexa, dimirrep = selected_ci_symm.reorder4irrep( eri1, norb, cd_indexa, orbsym) # (bb|aa) libfci.SCIcontract_2e_bbaa_symm(eri1.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), cd_indexa.ctypes.data_as(ctypes.c_void_p), cd_indexa.ctypes.data_as(ctypes.c_void_p), dimirrep.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(dimirrep))) lib.transpose_sum(ci1, inplace=True) return selected_ci._as_SCIvector(ci1.reshape(ci_coeff.shape), ci_strs)
def incore(eri, dm, hermi=0): assert (not numpy.iscomplexobj(eri)) eri = numpy.ascontiguousarray(eri) dm = numpy.ascontiguousarray(dm) nao = dm.shape[0] vj = numpy.empty((nao, nao)) vk = numpy.empty((nao, nao)) npair = nao * (nao + 1) // 2 if eri.ndim == 2 and npair * npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') tridm = dm elif eri.ndim == 1 and npair * (npair + 1) // 2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') fvj = _fpointer('CVHFics8_tridm_vj') if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') tridm = lib.pack_tril(lib.transpose_sum(dm)) i = numpy.arange(nao) tridm[i * (i + 1) // 2 + i] *= .5 else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dm.shape, eri.shape)) fdrv(eri.ctypes.data_as(ctypes.c_void_p), tridm.ctypes.data_as(ctypes.c_void_p), vj.ctypes.data_as(ctypes.c_void_p), dm.ctypes.data_as(ctypes.c_void_p), vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk) if hermi != 0: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: vj = lib.hermi_triu(vj, 1) return vj, vk
def incore(eri, dm, hermi=0): assert(not numpy.iscomplexobj(eri)) eri = numpy.ascontiguousarray(eri) dm = numpy.ascontiguousarray(dm) nao = dm.shape[0] vj = numpy.empty((nao,nao)) vk = numpy.empty((nao,nao)) npair = nao*(nao+1)//2 if eri.ndim == 2 and npair*npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') tridm = dm elif eri.ndim == 1 and npair*(npair+1)//2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') fvj = _fpointer('CVHFics8_tridm_vj') if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') tridm = lib.pack_tril(lib.transpose_sum(dm)) i = numpy.arange(nao) tridm[i*(i+1)//2+i] *= .5 else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dm.shape, eri.shape)) fdrv(eri.ctypes.data_as(ctypes.c_void_p), tridm.ctypes.data_as(ctypes.c_void_p), vj.ctypes.data_as(ctypes.c_void_p), dm.ctypes.data_as(ctypes.c_void_p), vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk) if hermi != 0: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: vj = lib.hermi_triu(vj, 1) return vj, vk
def contract_2e(eri, civec_strs, norb, nelec, link_index=None, orbsym=None): ci_coeff, nelec, ci_strs = selected_ci._unpack(civec_strs, nelec) if link_index is None: link_index = selected_ci._all_linkstr_index(ci_strs, norb, nelec) cd_indexa, dd_indexa, cd_indexb, dd_indexb = link_index na, nlinka = nb, nlinkb = cd_indexa.shape[:2] eri = ao2mo.restore(1, eri, norb) eri1 = eri.transpose(0,2,1,3) - eri.transpose(0,2,3,1) idx,idy = numpy.tril_indices(norb, -1) idx = idx * norb + idy eri1 = lib.take_2d(eri1.reshape(norb**2,-1), idx, idx) * 2 lib.transpose_sum(eri1, inplace=True) eri1 *= .5 eri1, dd_indexa, dimirrep = selected_ci_symm.reorder4irrep(eri1, norb, dd_indexa, orbsym, -1) fcivec = ci_coeff.reshape(na,nb) ci1 = numpy.zeros_like(fcivec) # (aa|aa) if nelec[0] > 1: ma, mlinka = mb, mlinkb = dd_indexa.shape[:2] libfci.SCIcontract_2e_aaaa_symm(eri1.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(ma), ctypes.c_int(mlinka), dd_indexa.ctypes.data_as(ctypes.c_void_p), dimirrep.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(dimirrep))) h_ps = numpy.einsum('pqqs->ps', eri) * (.5/nelec[0]) eri1 = eri.copy() for k in range(norb): eri1[:,:,k,k] += h_ps eri1[k,k,:,:] += h_ps eri1 = ao2mo.restore(4, eri1, norb) lib.transpose_sum(eri1, inplace=True) eri1 *= .5 eri1, cd_indexa, dimirrep = selected_ci_symm.reorder4irrep(eri1, norb, cd_indexa, orbsym) # (bb|aa) libfci.SCIcontract_2e_bbaa_symm(eri1.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), cd_indexa.ctypes.data_as(ctypes.c_void_p), cd_indexa.ctypes.data_as(ctypes.c_void_p), dimirrep.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(dimirrep))) lib.transpose_sum(ci1, inplace=True) return selected_ci._as_SCIvector(ci1.reshape(ci_coeff.shape), ci_strs)
def contract_2e(eri, fcivec, norb, nelec, link_index=None, orbsym=None, wfnsym=0): if orbsym is None: return direct_spin0.contract_2e(eri, fcivec, norb, nelec, link_index) eri = ao2mo.restore(4, eri, norb) neleca, nelecb = direct_spin1._unpack_nelec(nelec) assert(neleca == nelecb) link_indexa = direct_spin0._unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] eri_irs, rank_eri, irrep_eri = direct_spin1_symm.reorder_eri(eri, norb, orbsym) totirrep = len(eri_irs) strsa = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca)) aidx, link_indexa = direct_spin1_symm.gen_str_irrep(strsa, orbsym, link_indexa, rank_eri, irrep_eri, totirrep) Tirrep = ctypes.c_void_p*totirrep linka_ptr = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in link_indexa]) eri_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in eri_irs]) dimirrep = (ctypes.c_int*totirrep)(*[x.shape[0] for x in eri_irs]) fcivec_shape = fcivec.shape fcivec = fcivec.reshape((na,na), order='C') ci1new = numpy.zeros_like(fcivec) nas = (ctypes.c_int*8)(*[x.size for x in aidx]) ci0 = [] ci1 = [] for ir in range(totirrep): ma, mb = aidx[ir].size, aidx[wfnsym^ir].size ci0.append(numpy.zeros((ma,mb))) ci1.append(numpy.zeros((ma,mb))) if ma > 0 and mb > 0: lib.take_2d(fcivec, aidx[ir], aidx[wfnsym^ir], out=ci0[ir]) ci0_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci0]) ci1_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci1]) libfci.FCIcontract_2e_symm1(eri_ptrs, ci0_ptrs, ci1_ptrs, ctypes.c_int(norb), nas, nas, ctypes.c_int(nlinka), ctypes.c_int(nlinka), linka_ptr, linka_ptr, dimirrep, ctypes.c_int(totirrep), ctypes.c_int(wfnsym)) for ir in range(totirrep): if ci0[ir].size > 0: lib.takebak_2d(ci1new, ci1[ir], aidx[ir], aidx[wfnsym^ir]) return lib.transpose_sum(ci1new, inplace=True).reshape(fcivec_shape)
def make_hdiag(h1e, eri, norb, nelec): if isinstance(nelec, (int, numpy.number)): neleca = nelec // 2 else: neleca, nelecb = nelec assert (neleca == nelecb) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) strs = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca)) na = len(strs) hdiag = numpy.empty((na, na)) jdiag = numpy.asarray(numpy.einsum('iijj->ij', eri), order='C') kdiag = numpy.asarray(numpy.einsum('ijji->ij', eri), order='C') libfci.FCImake_hdiag(hdiag.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), jdiag.ctypes.data_as(ctypes.c_void_p), kdiag.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(neleca), strs.ctypes.data_as(ctypes.c_void_p)) # symmetrize hdiag to reduce numerical error hdiag = lib.transpose_sum(hdiag, inplace=True) * .5 return hdiag.ravel()
def make_hdiag(h1e, eri, norb, nelec): if isinstance(nelec, (int, numpy.number)): neleca = nelec//2 else: neleca, nelecb = nelec assert(neleca == nelecb) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) strs = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca)) na = len(strs) hdiag = numpy.empty((na,na)) jdiag = numpy.asarray(numpy.einsum('iijj->ij',eri), order='C') kdiag = numpy.asarray(numpy.einsum('ijji->ij',eri), order='C') libfci.FCImake_hdiag(hdiag.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), jdiag.ctypes.data_as(ctypes.c_void_p), kdiag.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(neleca), strs.ctypes.data_as(ctypes.c_void_p)) # symmetrize hdiag to reduce numerical error hdiag = lib.transpose_sum(hdiag, inplace=True) * .5 return hdiag.ravel()
def kernel_ms0(fci, h1e, eri, norb, nelec, ci0=None, link_index=None, tol=None, lindep=None, max_cycle=None, max_space=None, nroots=None, davidson_only=None, pspace_size=None, max_memory=None, verbose=None, ecore=0, **kwargs): if nroots is None: nroots = fci.nroots if davidson_only is None: davidson_only = fci.davidson_only if pspace_size is None: pspace_size = fci.pspace_size assert(fci.spin is None or fci.spin == 0) link_index = _unpack(norb, nelec, link_index) h1e = numpy.ascontiguousarray(h1e) eri = numpy.ascontiguousarray(eri) na = link_index.shape[0] hdiag = fci.make_hdiag(h1e, eri, norb, nelec) addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag, max(pspace_size,nroots)) if pspace_size > 0: pw, pv = scipy.linalg.eigh(h0) else: pw = pv = None if pspace_size >= na*na and ci0 is None and not davidson_only: # The degenerated wfn can break symmetry. The davidson iteration with proper # initial guess doesn't have this issue if na*na == 1: return pw[0]+ecore, pv[:,0].reshape(1,1) elif nroots > 1: civec = numpy.empty((nroots,na*na)) civec[:,addr] = pv[:,:nroots].T civec = civec.reshape(nroots,na,na) try: return pw[:nroots]+ecore, [_check_(ci) for ci in civec] except ValueError: pass elif abs(pw[0]-pw[1]) > 1e-12: civec = numpy.empty((na*na)) civec[addr] = pv[:,0] civec = civec.reshape(na,na) civec = lib.transpose_sum(civec) * .5 # direct diagonalization may lead to triplet ground state ##TODO: optimize initial guess. Using pspace vector as initial guess may have ## spin problems. The 'ground state' of psapce vector may have different spin ## state to the true ground state. try: return pw[0]+ecore, _check_(civec.reshape(na,na)) except ValueError: pass precond = fci.make_precond(hdiag, pw, pv, addr) h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5) def hop(c): hc = fci.contract_2e(h2e, c.reshape(na,na), norb, nelec, link_index) return hc.ravel() #TODO: check spin of initial guess if ci0 is None: if hasattr(fci, 'get_init_guess'): ci0 = fci.get_init_guess(norb, nelec, nroots, hdiag) else: ci0 = [] for i in range(nroots): x = numpy.zeros(na,na) if addr[i] == 0: x[0,0] = 1 else: addra = addr[i] // na addrb = addr[i] % na x[addra,addrb] = x[addrb,addra] = numpy.sqrt(.5) ci0.append(x.ravel()) else: if isinstance(ci0, numpy.ndarray) and ci0.size == na*na: ci0 = [ci0.ravel()] else: ci0 = [x.ravel() for x in ci0] if tol is None: tol = fci.conv_tol if lindep is None: lindep = fci.lindep if max_cycle is None: max_cycle = fci.max_cycle if max_space is None: max_space = fci.max_space if max_memory is None: max_memory = fci.max_memory if verbose is None: verbose = logger.Logger(fci.stdout, fci.verbose) #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep) e, c = fci.eig(hop, ci0, precond, tol=tol, lindep=lindep, max_cycle=max_cycle, max_space=max_space, nroots=nroots, max_memory=max_memory, verbose=verbose, follow_state=True, **kwargs) if nroots > 1: return e+ecore, [_check_(ci.reshape(na,na)) for ci in c] else: return e+ecore, _check_(c.reshape(na,na))
def get_eri(mydf, kpts=None, compact=True): cell = mydf.cell if kpts is None: kptijkl = numpy.zeros((4,3)) elif numpy.shape(kpts) == (3,): kptijkl = numpy.vstack([kpts]*4) else: kptijkl = numpy.reshape(kpts, (4,3)) if mydf._cderi is None: mydf.build() kpti, kptj, kptk, kptl = kptijkl auxcell = mydf.auxcell nao = cell.nao_nr() naux = auxcell.nao_nr() nao_pair = nao * (nao+1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < 1e-9: eriR = numpy.zeros((nao_pair,nao_pair)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(j3cR.T, LpqR, 1, eriR, 1) eriR = lib.transpose_sum(eriR, inplace=True) coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 trilidx = numpy.tril_indices(nao) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): pqkR = numpy.asarray(pqkR.reshape(nao,nao,-1)[trilidx], order='C') pqkI = numpy.asarray(pqkI.reshape(nao,nao,-1)[trilidx], order='C') vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.dot(pqkR, pqkR.T, 1, eriR, 1) lib.dot(pqkI, pqkI.T, 1, eriR, 1) if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < 1e-9) and (abs(kptj-kptk).sum() < 1e-9): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1) zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1) LpqR = LpqI = j3cR = j3cI = None coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) return (eriR.reshape((nao,)*4).transpose(0,1,3,2) + eriI.reshape((nao,)*4).transpose(0,1,3,2)*1j).reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4 for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory), mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] # rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs)) # = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs)) # = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs) # rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs)) zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) return eriR + eriI*1j
def make_hdiag(h1e, eri, ci_strs, norb, nelec): hdiag = select_ci.make_hdiag(h1e, eri, ci_strs, norb, nelec) na = len(ci_strs[0]) lib.transpose_sum(hdiag.reshape(na,na), inplace=True) hdiag *= .5 return hdiag
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None): # dm2 = ccsd_rdm._make_rdm2(mycc, None, d2, with_dm1=False) # dm2 = numpy.einsum('pi,ijkl->pjkl', mo_coeff, dm2) # dm2 = numpy.einsum('pj,ijkl->ipkl', mo_coeff, dm2) # dm2 = numpy.einsum('pk,ijkl->ijpl', mo_coeff, dm2) # dm2 = numpy.einsum('pl,ijkl->ijkp', mo_coeff, dm2) # dm2 = dm2 + dm2.transpose(1,0,2,3) # dm2 = dm2 + dm2.transpose(0,1,3,2) # return ao2mo.restore(4, dm2*.5, nmo) log = logger.Logger(mycc.stdout, mycc.verbose) time1 = time.clock(), time.time() if fsave is None: incore = True fsave = lib.H5TmpFile() else: incore = False dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 nocc, nvir = dovov.shape[:2] mo_coeff = numpy.asarray(mo_coeff, order='F') nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 nvir_pair = nvir * (nvir+1) //2 fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1 fmm = _ccsd.libcc.CCmmm_transpose_sum pao_loc = ctypes.POINTER(ctypes.c_void_p)() def _trans(vin, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow,nao_pair)) fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int*4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out fswap = lib.H5TmpFile() max_memory = mycc.max_memory - lib.current_memory()[0] blksize = int(max_memory*1e6/8/(nao_pair+nmo**2)) blksize = min(nvir_pair, max(ccsd.BLKMIN, blksize)) chunks_vv = (int(min(blksize,4e8/blksize)), blksize) fswap.create_dataset('v', (nao_pair,nvir_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nvir_pair, blksize): fswap['v'][:,p0:p1] = _trans(lib.unpack_tril(_cp(dvvvv[p0:p1])), (nocc,nmo,nocc,nmo)).T time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) blksize = int(max_memory*1e6/8/(nao_pair+nmo**2)) blksize = min(nao_pair, max(ccsd.BLKMIN, blksize)) fswap.create_dataset('o', (nmo,nocc,nao_pair), 'f8', chunks=(nocc,nocc,blksize)) buf1 = numpy.zeros((nocc,nocc,nmo,nmo)) buf1[:,:,:nocc,:nocc] = doooo buf1[:,:,nocc:,nocc:] = _cp(doovv) buf1 = _trans(buf1.reshape(nocc**2,-1), (0,nmo,0,nmo)) fswap['o'][:nocc] = buf1.reshape(nocc,nocc,nao_pair) dovoo = numpy.asarray(dooov).transpose(2,3,0,1) for p0, p1 in lib.prange(nocc, nmo, nocc): buf1 = numpy.zeros((nocc,p1-p0,nmo,nmo)) buf1[:,:,:nocc,:nocc] = dovoo[:,p0-nocc:p1-nocc] buf1[:,:,nocc:,:nocc] = dovvo[:,p0-nocc:p1-nocc] buf1[:,:,:nocc,nocc:] = dovov[:,p0-nocc:p1-nocc] buf1[:,:,nocc:,nocc:] = dovvv[:,p0-nocc:p1-nocc] buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocc,-1) buf1 = _trans(buf1, (0,nmo,0,nmo)) fswap['o'][p0:p1] = buf1.reshape(p1-p0,nocc,nao_pair) time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) dovoo = buf1 = None # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) gsave = fsave.create_dataset('dm2', (nao_pair,nao_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1-p0,nmo,nmo)) buf1[:,nocc:,nocc:] = lib.unpack_tril(_cp(fswap['v'][p0:p1])) buf1[:,:,:nocc] = fswap['o'][:,:,p0:p1].transpose(2,0,1) buf2 = _trans(buf1, (0,nmo,0,nmo)) if p0 > 0: buf1 = _cp(gsave[:p0,p0:p1]) buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T gsave[:p0,p0:p1] = buf1 lib.transpose_sum(buf2[:,p0:p1], inplace=True) gsave[p0:p1] = buf2 time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) if incore: return fsave['dm2'].value else: return fsave
def make_hdiag(h1e, eri, ci_strs, norb, nelec): hdiag = selected_ci.make_hdiag(h1e, eri, ci_strs, norb, nelec) na = len(ci_strs[0]) lib.transpose_sum(hdiag.reshape(na, na), inplace=True) hdiag *= .5 return hdiag
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) if d2 is None: d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ooov = _cp(dooov) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) tmpooov = _cp(eris_ooov.transpose(0, 1, 3, 2)) Ioo += lib.dot(tmpooov.reshape(-1, nocc).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + d_ooov.transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov) Ioo += lib.dot(eris_ooov.reshape(nocc, -1), d_ooov.reshape(nocc, -1).T) Xvo += lib.dot( eris_ovov.reshape(-1, nvir).T, _cp(d_ooov.transpose(0, 2, 3, 1).reshape(nocc, -1)).T) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv + doovv.transpose(1, 0, 3, 2)) for i in range(nocc): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot(d_oovv.reshape(-1, nvir).T, tmpooov.reshape(-1, nocc)) d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(nocc, nocc, -1) eris_ooov = tmpooov = None blksize = 4 d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1) lib.transpose_sum(d_ovov.reshape(nov, nov), inplace=True) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) nvir_pair = nvir * (nvir + 1) // 2 bufe_ovvv = numpy.empty((blksize, nvir, nvir, nvir)) bufc_ovvv = numpy.empty((blksize, nvir, nvir_pair)) bufc_ovvv.data = bufe_ovvv.data c_vvvo = numpy.empty((nvir_pair, nvir, nocc)) for p0, p1 in prange(0, nocc, blksize): d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): lib.dot(dovvv[p0 + i].reshape(nvir, -1), eris_oovv[p0 + i].reshape(nocc, -1).T, 1, Ivo, 1) c_ovvv = bufc_ovvv[:p1 - p0] # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) _ccsd.precontract(dovvv[p0:p1].reshape(-1, nvir, nvir), out=c_ovvv) for i0, i1, in prange(0, nvir_pair, BLKSIZE): for j0, j1 in prange(0, nvir, BLKSIZE // (p1 - p0) + 1): c_vvvo[i0:i1, j0:j1, p0:p1] = c_ovvv[:, j0:j1, i0:i1].transpose(2, 1, 0) eris_ovx = _cp(eris.ovvv[p0:p1]) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[p0 + i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) eris_ovvv = bufe_ovvv[:p1 - p0] lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair), out=eris_ovvv.reshape(-1, nvir**2)) eris_ovx = None #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov) d_ovvo = _cp(d_ovov[p0:p1].transpose(0, 1, 3, 2)) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) e_ovvo, d_ovvo = d_ovvo, None for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(dovvv[p0 + i]) e_ovvo[i] = eris_ovov[p0 + i].transpose(0, 2, 1) #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) lib.dot( d_ovvv.reshape(-1, nvir).T, e_ovvo[:p1 - p0].reshape(-1, nocc), 1, Ivo, 1) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir), 1, Ivv, 1) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None eris_ovvv = _cp(eris.ovvv) bufe_vvvo = numpy.empty((blksize * nvir, nvir, nocc)) bufe_vvvv = numpy.empty((blksize * nvir, nvir, nvir)) bufd_vvvv = numpy.empty((blksize * nvir, nvir, nvir)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv, out=bufd_vvvv[:off1 - off0]) eris_vvvv = lib.unpack_tril(eris.vvvv[off0:off1], out=bufe_vvvv[:off1 - off0]) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) d_vvvo = _cp(c_vvvo[off0:off1]) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 eris_vvvo = bufe_vvvo[:off1 - off0] for i0, i1 in prange(off0, off1, BLKSIZE): for j0, j1, in prange(0, nvir, BLKSIZE // nocc + 1): eris_vvvo[i0 - off0:i1 - off0, j0:j1, :] = eris_ovvv[:, j0:j1, i0:i1].transpose(2, 1, 0) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def get_eri(mydf, kpts=None, compact=True): cell = mydf.cell if kpts is None: kptijkl = numpy.zeros((4, 3)) elif numpy.shape(kpts) == (3, ): kptijkl = numpy.vstack([kpts] * 4) else: kptijkl = numpy.reshape(kpts, (4, 3)) if mydf._cderi is None: mydf.build() kpti, kptj, kptk, kptl = kptijkl auxcell = mydf.auxcell nao = cell.nao_nr() naux = auxcell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < 1e-9: eriR = numpy.zeros((nao_pair, nao_pair)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(j3cR.T, LpqR, 1, eriR, 1) LpqR = LpqI = j3cR = j3cI = None eriR = lib.transpose_sum(eriR, inplace=True) coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 trilidx = numpy.tril_indices(nao) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): pqkR = numpy.asarray(pqkR.reshape(nao, nao, -1)[trilidx], order='C') pqkI = numpy.asarray(pqkI.reshape(nao, nao, -1)[trilidx], order='C') vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.dot(pqkR, pqkR.T, 1, eriR, 1) lib.dot(pqkI, pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1) return eriR #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti - kptl).sum() < 1e-9) and (abs(kptj - kptk).sum() < 1e-9): eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1) zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1) LpqR = LpqI = j3cR = j3cI = None coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) return (eriR.reshape((nao, ) * 4).transpose(0, 1, 3, 2) + eriI.reshape( (nao, ) * 4).transpose(0, 1, 3, 2) * 1j).reshape(nao**2, -1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4 for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory), mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] # rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs)) # = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs)) # = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs) # rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs)) zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return eriR + eriI * 1j
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:, mo_occ > 0] dm0 = numpy.dot(mocc, mocc.T) * 2 ni = copy.copy(mf._numint) if USE_XCFUN: try: ni.libxc = dft.xcfun xctype = ni._xc_type(mf.xc) except (ImportError, KeyError, NotImplementedError): ni.libxc = dft.libxc xctype = ni._xc_type(mf.xc) else: xctype = ni._xc_type(mf.xc) grids = mf.grids hyb = ni.libxc.hybrid_coeff(mf.xc) max_memory = 4000 h1a = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3) h1ao[:, p0:p1] += h1a[:, p0:p1] h1ao = h1ao + h1ao.transpose(0, 2, 1) shls_slice = (shl0, shl1) + (0, mol.nbas) * 3 int2e_ip1 = mol._add_suffix('int2e_ip1') if abs(hyb) > 1e-10: vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm(int2e_ip1, 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 - hyb * .5 * vk1 veff[:, p0:p1] += vj2 - hyb * .5 * vk2 else: vj1, vj2 = \ _vhf.direct_bindm(int2e_ip1, 's2kl', ('ji->s2kl', 'lk->s1ij'), (-dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 veff[:, p0:p1] += vj2 if xctype == 'LDA': ao_deriv = 1 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho = vxc[0] frr = fxc[0] half = lib.dot(ao[0], dm0[:, p0:p1].copy()) rho1 = numpy.einsum('xpi,pi->xp', ao[1:, :, p0:p1], half) aow = numpy.einsum('pi,xp->xpi', ao[0], weight * frr * rho1) aow1 = numpy.einsum('xpi,p->xpi', ao[1:, :, p0:p1], weight * vrho) aow[:, :, p0:p1] += aow1 veff[0] += lib.dot(-aow[0].T, ao[0]) veff[1] += lib.dot(-aow[1].T, ao[0]) veff[2] += lib.dot(-aow[2].T, ao[0]) half = aow = aow1 = None elif xctype == 'GGA': def get_wv(rho, rho1, weight, vxc, fxc): vgamma = vxc[1] frr, frg, fgg = fxc[:3] ngrid = weight.size sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:]) wv = numpy.empty((4, ngrid)) wv[0] = frr * rho1[0] wv[0] += frg * sigma1 * 2 wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:] wv[1:] += vgamma * rho1[1:] * 2 wv *= weight return wv ao_deriv = 2 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho, vgamma = vxc[:2] # (d_X \nabla_x mu) nu DM_{mu,nu} half = lib.dot(ao[0], dm0[:, p0:p1].copy()) rho1X = numpy.einsum('xpi,pi->xp', ao[[1, XX, XY, XZ], :, p0:p1], half) rho1Y = numpy.einsum('xpi,pi->xp', ao[[2, YX, YY, YZ], :, p0:p1], half) rho1Z = numpy.einsum('xpi,pi->xp', ao[[3, ZX, ZY, ZZ], :, p0:p1], half) # (d_X mu) (\nabla_x nu) DM_{mu,nu} half = lib.dot(ao[1], dm0[:, p0:p1].copy()) rho1X[1] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[1] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[1] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) half = lib.dot(ao[2], dm0[:, p0:p1].copy()) rho1X[2] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[2] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[2] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) half = lib.dot(ao[3], dm0[:, p0:p1].copy()) rho1X[3] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half) rho1Y[3] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half) rho1Z[3] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half) wv = get_wv(rho, rho1X, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Y, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Z, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = numpy.empty_like(rho) wv[0] = weight * vrho wv[1:] = rho[1:] * (weight * vgamma * 2) aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0, p0:p1] -= lib.dot(ao[1, :, p0:p1].T.copy(), aow) veff[1, p0:p1] -= lib.dot(ao[2, :, p0:p1].T.copy(), aow) veff[2, p0:p1] -= lib.dot(ao[3, :, p0:p1].T.copy(), aow) aow = numpy.einsum('npi,np->pi', ao[[XX, XY, XZ], :, p0:p1], wv[1:4]) veff[0, p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[YX, YY, YZ], :, p0:p1], wv[1:4]) veff[1, p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[ZX, ZY, ZZ], :, p0:p1], wv[1:4]) veff[2, p0:p1] -= lib.dot(aow.T, ao[0]) else: raise NotImplementedError('meta-GGA') veff = veff + veff.transpose(0, 2, 1) if chkfile is None: h1aos.append(h1ao + veff) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao + veff) if chkfile is None: return h1aos else: return chkfile
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 eri_mo = pwdf_ao2mo.general(mydf, mo_coeffs, kptijkl, compact) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) if sym: eri_mo *= .5 # because we'll do +cc later ijR = klR = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, j3cR, klR, klmosym, mokl, klslice, False) lib.ddot(ijR.T, klR, 1, eri_mo, 1) if not sym: ijR, klR = _dtrans(j3cR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, False) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = j3cR = j3cI = None if sym: eri_mo = lib.transpose_sum(eri_mo, inplace=True) return eri_mo #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_lk = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): bufL = LpqR+LpqI*1j bufj = j3cR+j3cI*1j zij, zlk = _ztrans(bufL, zij, moij, ijslice, bufj, zlk, molk, lkslice, False) lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1) if not sym: zij, zlk = _ztrans(bufj, zij, moij, ijslice, bufL, zlk, molk, lkslice, False) lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1) LpqR = LpqI = j3cR = j3cI = bufL = bufj = None if sym: eri_lk += lib.transpose(eri_lk).conj() nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_lk = lib.transpose(eri_lk.reshape(-1,nmol,nmok), axes=(0,2,1)) eri_mo += eri_lk.reshape(nij_pair,nlk_pair) return eri_mo #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] max_memory *= .5 zij = zkl = None for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, jrsR+jrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) zij, zkl = _ztrans(jpqR+jpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None return eri_mo
def update_amps(cc, t1, t2, eris, max_memory=2000): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nocc, nvir = t1.shape nov = nocc*nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) #** make_inter_F fov = fock[:nocc,nocc:].copy() foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2,-1) woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3)) eris_ooov = None time1 = log.timer_debug1('woooo', *time0) unit = _memory_usage_inloop(nocc, nvir)*1e6/8 max_memory = max_memory - lib.current_memory()[0] blksize = max(BLKMIN, int(max_memory*.95e6/8/unit)) log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize, nocc, int((nocc+blksize-1)//blksize)) for p0, p1 in prange(0, nocc, blksize): # ==== read eris.ovvv ==== eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1-p0)*nvir,-1)) eris_ovvv = eris_ovvv.reshape(p1-p0,nvir,nvir,nvir) fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1], eris_ovvv) fvv += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kdcb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->ijba', -t1, tmp) #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy() eris_vvov = _cp(eris_ovvv.transpose(2,1,0,3).reshape(nvir*nvir,-1)) tmp = numpy.empty((nocc,nocc,p1-p0,nvir)) taubuf = numpy.empty((blksize,nocc,nvir,nvir)) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1-j0]) lib.dot(tau.reshape(-1,nvir*nvir), eris_vvov, 1, tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0) tmp = _cp(tmp.transpose(0,1,3,2).reshape(-1,p1-p0)) lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) tau = tmp = eris_vvov = None #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2) #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) #: t2new += woVoV.transpose() #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1) tmp = _cp(eris.ovoo[p0:p1].transpose(2,0,1,3)) wOVov = lib.dot(tmp.reshape(-1,nocc), t1, -1) tmp = None wOVov = wOVov.reshape(nocc,p1-p0,nvir,nvir) #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1) lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wOVov.reshape(nocc,-1), 1) t2new[p0:p1] += wOVov.transpose(1,0,2,3) eris_ooov = _cp(eris.ooov[p0:p1]) #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv) woVoV = lib.dot(_cp(eris_ooov.transpose(0,1,3,2).reshape(-1,nocc)), t1) woVoV = woVoV.reshape(p1-p0,nocc,nvir,nvir) for i in range(eris_ovvv.shape[0]): lib.dot(t1, eris_ovvv[i].reshape(nvir,-1), -1, woVoV[i].reshape(nocc,-1), 1) #: theta = t2.transpose(0,1,3,2) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = numpy.empty((p1-p0,nocc,nvir,nvir)) for i in range(p1-p0): theta[i] = t2[p0+i].transpose(0,2,1) * 2 theta[i] -= t2[p0+i] lib.dot(_cp(theta[i].transpose(0,2,1).reshape(nocc,-1)), eris_ovvv[i].reshape(-1,nvir), 1, t1new, 1) eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1) #==== mem usage blksize*(nvir**3+nocc*nvir**2*4) # ==== read eris.ovov ==== eris_ovov = _cp(eris.ovov[p0:p1]) #==== mem usage blksize*(nocc*nvir**2*4) for i in range(p1-p0): t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5 fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('jb,ijba->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta) t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta) #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta) lib.dot(_cp(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1)), theta.reshape(-1,nvir), -1, t1new, 1) eris_ooov = None #: wOVov += eris.ovov.transpose(0,1,3,2) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) theta = _cp(theta.transpose(0,3,1,2)) wOVov = _cp(wOVov.transpose(0,3,1,2)) eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov)).reshape(nocc,nvir,-1,nvir) eris_OvoV = _cp(eris_OVov.transpose(0,3,2,1)) wOVov += eris_OVov for j0, j1 in prange(0, nocc, blksize): t2iajb = t2[j0:j1].transpose(0,2,1,3).copy() #: wOVov[j0:j1] -= .5 * numpy.einsum('iack,jkbc->jbai', eris_ovov, t2) lib.dot(t2iajb.reshape(-1,nov), eris_OvoV.reshape(nov,-1), -.5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1) tau, t2iajb = t2iajb, None for i in range(j1-j0): tau[i] *= 2 tau[i] -= t2[j0+i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1) #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.dot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1), .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp, tau = tau, None lib.dot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta.reshape(-1,nov), 1, tmp.reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,0,2) tmp = None #==== mem usage blksize*(nocc*nvir**2*8) theta = wOVov = eris_OvoV = eris_OVov = None time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2) #==== mem usage blksize*(nocc*nvir**2*2) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) for i in range(p1-p0): tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1) tau += t2[p0+i] theta = tau.transpose(0,2,1) * 2 theta -= tau lib.dot(_cp(eris_ovov[i].transpose(1,2,0)).reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1) lib.dot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1) tau = theta = None # ==== read eris.oovv ==== eris_oovv = _cp(eris.oovv[p0:p1]) #==== mem usage blksize*(nocc*nvir**2*3) #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiab', -t1, tmp) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(p1-p0): tmp = lib.dot(t1, eris_oovv[j].reshape(-1,nvir).T) tmp = _cp(tmp.reshape(nocc,nocc,nvir).transpose(0,2,1)) t2new[p0+j] += lib.dot(tmp.reshape(-1,nocc), t1, -1).reshape(nocc,nvir,nvir).transpose(0,2,1) lib.dot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) lib.dot(tmp.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1) tmp = None #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: woVoV -= eris.oovv #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov) #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv) woVoV -= eris_oovv woVoV = woVoV.transpose(1,3,0,2).copy() eris_oVOv = _cp(eris_ovov.transpose(0,3,2,1)) eris_oOvV = _cp(eris_ovov.transpose(0,2,1,3)) #==== mem usage blksize*(nocc*nvir**2*4) taubuf = numpy.empty((blksize,nocc,nvir,nvir)) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1-j0]) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) lib.numpy_helper._dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir, eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir), woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc) for i in range(j1-j0): tau[i] -= t2[j0+i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) lib.dot(_cp(tau.transpose(0,3,1,2).reshape(-1,nov)), eris_oVOv.reshape(-1,nov).T, 1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1) #==== mem usage blksize*(nocc*nvir**2*6) time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=taubuf[:p1-p0]) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.dot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) eris_oovv = eris_ovov = eris_oVOv = eris_oOvV = taubuf = tau = None #==== mem usage blksize*(nocc*nvir**2*1) t2iajb = _cp(t2[p0:p1].transpose(0,2,1,3)) t2ibja = _cp(t2[p0:p1].transpose(0,3,1,2)) tmp = numpy.empty((blksize,nvir,nocc,nvir)) for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,2,0) #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb) lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2iajb.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,0,2) t2ibja = t2iajb = woVoV = tmp = None #==== mem usage blksize*(nocc*nvir**2*3) time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1) # ================== time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1) #: t2new = t2new + t2new.transpose(1,0,3,2) t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i+1): t2new_tril[ij] = t2new[i,j] t2new_tril[ij] += t2new[j,i].T ij += 1 t2new = None time1 = log.timer_debug1('t2 tril', *time1) cc.add_wvvVV_(t1, t2, eris, t2new_tril, max_memory) time1 = log.timer_debug1('vvvv', *time1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] p0 = 0 for i in range(nocc): dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1)) t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2) p0 += i+1 time1 = log.timer_debug1('g2/dijab', *time1) t2new = numpy.empty((nocc,nocc,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i): t2new[i,j] = t2new_tril[ij] t2new[j,i] = t2new_tril[ij].T ij += 1 t2new[i,i] = t2new_tril[ij] ij += 1 t2new_tril = None #** update_amp_t1 t1new += fock[:nocc,nocc:] \ + numpy.einsum('ib,ab->ia', t1, fvv) \ - numpy.einsum('ja,ji->ia', t1, foo) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new /= eia #** end update_amp_t1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def update_amps(mycc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc*nvir fock = eris.fock t1t2new = numpy.zeros((nov+nov**2)) t1new = t1t2new[:nov].reshape(t1.shape) t2new = t1t2new[nov:].reshape(t2.shape) t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir)) mycc.add_wvvVV_(t1, t2, eris, t2new_tril) idxo = numpy.tril_indices(nocc) lib.takebak_2d(t2new.reshape(nocc**2,nvir**2), t2new_tril.reshape(-1,nvir**2), idxo[0]*nocc+idxo[1], numpy.arange(nvir**2)) idxo = numpy.arange(nocc) t2new[idxo,idxo] *= .5 t2new_tril = None time1 = log.timer_debug1('vvvv', *time0) #** make_inter_F fov = fock[:nocc,nocc:].copy() t1new += fov foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc**2,nocc**2), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2,nocc**2) woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3)) eris_ooov = None time1 = log.timer_debug1('woooo', *time1) unit = _memory_usage_inloop(nocc, nvir) max_memory = max(2000, mycc.max_memory - lib.current_memory()[0]) blksize = min(nocc, max(BLKMIN, int(max_memory/unit))) blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2)) blknvir = min(nvir, max(BLKMIN, blknvir)) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d,%d', max_memory, nocc, nvir, blksize, blknvir) nvir_pair = nvir * (nvir+1) // 2 def prefect_ovvv(p0, p1, q0, q1, prefetch): if q1 != nvir: q0, q1 = q1, min(nvir, q1+blknvir) readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch) readbuf[:] = eris.ovvv[p0:p1,q0:q1] def prefect_ovov(p0, p1, buf): buf[:] = eris.ovov[p0:p1] def prefect_oovv(p0, p1, buf): buf[:] = eris.oovv[p0:p1] buflen = max(nocc*nvir**2, nocc**3) bufs = numpy.empty((5,blksize*buflen)) buf1, buf2, buf3, buf4, buf5 = bufs for p0, p1 in prange(0, nocc, blksize): #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3) wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4) handler = None readbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir)) for q0, q1 in lib.prange(0, nvir, blknvir): if q0 == 0: readbuf[:] = eris.ovvv[p0:p1,q0:q1] else: readbuf, prefetchbuf = prefetchbuf, readbuf handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf) eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf) #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1]) eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf) eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp) if not mycc.direct: eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir)) eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1) tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1) for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2) tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau) tau += t2[j0:j1,:,q0:q1] lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1, tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0) tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2) tmp1[:] = tmp.transpose(1,0,2,3) lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) eris_vovv = tau = tmp1 = tmp = None fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv) fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv) tmp = t1[:,q0:q1].copy() for i in range(eris_ovvv.shape[0]): lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1, wooVV[i].reshape(nocc,-1)) #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1) tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1) lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) wOoVv[:,:,q0:q1] = tmp #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = tmp theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3) theta *= 2 theta -= t2[:,p0:p1,q0:q1,:] lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1) theta = tmp = None handler.join() readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1) tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1) tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3) lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1) eris_ooov = _cp(eris.ooov[p0:p1]) eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir)) handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv) tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1) tmp[:] = eris_ooov.transpose(0,1,3,2) #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1) t2new[p0:p1] += wOoVv.transpose(1,0,2,3) #:eris_oovv = _cp(eris.oovv[p0:p1]) handler.join() eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir)) handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov) #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) wooVV -= eris_oovv #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv) #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp) tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1) tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2) for j in range(p1-p0): tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1) lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2) t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir) eris_oovv = None #:eris_ovov = _cp(eris.ovov[p0:p1]) handler.join() for i in range(p1-p0): t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5 t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(p1-p0): lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1) lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1) tmp1 = tmp2 = tmp = None fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1) theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2) for i in range(p1-p0): tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau) tau += t2[p0+i] theta = lib.transpose(tau, axes=(0,2,1), out=theta) theta *= 2 theta -= tau vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau) lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1) lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1) tau = theta = vov = None #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1) #: t1new += numpy.einsum('jb,ijba->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta) theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) for i in range(p1-p0): tmp = t2[p0+i].transpose(0,2,1) * 2 tmp-= t2[p0+i] lib.ddot(eris_ooov[i].reshape(nocc,-1), tmp.reshape(-1,nvir), -1, t1new, 1) lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i]) # theta[i] = tmp.transpose(2,0,1) t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta) eris_ooov = None #: wOVov += eris.ovov #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) for i in range(p1-p0): wOoVv[:,i] += wooVV[i]*.5 #: jiba + ijba*.5 wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5) wOVov = wOVov.reshape(nocc,nvir,-1,nvir) eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3) eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir) wOVov += eris_OVov theta = theta.reshape(-1,nov) for i in range(nocc): # OVov-OVov.transpose(0,3,2,1)*.5 eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5 for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2) for i in range(j1-j0): tau[i] = t2[j0+i].transpose(1,0,2) * 2 tau[i] -= t2[j0+i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1) #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1), .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1, tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,0,2) theta = wOoVv = wOVov = eris_OVov = tmp = tau = None time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv) tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) tmp[:] = wooVV.transpose(0,2,1,3) woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir) eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3) eris_oOvV[:] = eris_ovov.transpose(0,2,1,3) eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5) eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir, eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir), woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc) for i in range(j1-j0): tau[i] -= t2[j0+i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov), eris_oVOv.reshape(-1,nov).T, 1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1) time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1), out=buf1).reshape(-1,nvir,nocc,nvir) tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2) for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,2,0) t2new[j0+i] += tmp[i].transpose(1,0,2) * .5 woVoV = t2ibja = tmp = None time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1) buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.ddot(ft_ij.T, t2.reshape(nocc,nocc*nvir**2),-1, t2new.reshape(nocc,nocc*nvir**2), 1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new += numpy.einsum('ib,ab->ia', t1, fvv) t1new -= numpy.einsum('ja,ji->ia', t1, foo) t1new /= eia #: t2new = t2new + t2new.transpose(1,0,3,2) ij = 0 for i in range(nocc): for j in range(i+1): t2new[i,j] += t2new[j,i].T t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j]) t2new[j,i] = t2new[i,j].T ij += 1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None): log = logger.Logger(mycc.stdout, mycc.verbose) time1 = time.clock(), time.time() if fsave is None: incore = True fsave = lib.H5TmpFile() else: incore = False dovov, dovOV, dOVov, dOVOV = d2[0] dvvvv, dvvVV, dVVvv, dVVVV = d2[1] doooo, dooOO, dOOoo, dOOOO = d2[2] doovv, dooVV, dOOvv, dOOVV = d2[3] dovvo, dovVO, dOVvo, dOVVO = d2[4] dvvov, dvvOV, dVVov, dVVOV = d2[5] dovvv, dovVV, dOVvv, dOVVV = d2[6] dooov, dooOV, dOOov, dOOOV = d2[7] mo_a = numpy.asarray(mo_coeff[0], order='F') mo_b = numpy.asarray(mo_coeff[1], order='F') nocca, nvira, noccb, nvirb = dovOV.shape nao, nmoa = mo_a.shape nmob = mo_b.shape[1] nao_pair = nao * (nao+1) // 2 nvira_pair = nvira * (nvira+1) //2 nvirb_pair = nvirb * (nvirb+1) //2 fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1 fmm = _ccsd.libcc.CCmmm_transpose_sum pao_loc = ctypes.POINTER(ctypes.c_void_p)() def _trans(vin, mo_coeff, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow,nao_pair)) fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int*4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out fswap = lib.H5TmpFile() max_memory = mycc.max_memory - lib.current_memory()[0] blksize_a = int(max_memory*.9e6/8/(nao_pair+nmoa**2)) blksize_a = min(nvira_pair, max(ccsd.BLKMIN, blksize_a)) chunks_a = (int(min(nao_pair, 4e8/blksize_a)), blksize_a) v_aa = fswap.create_dataset('v_aa', (nao_pair,nvira_pair), 'f8', chunks=chunks_a) for p0, p1 in lib.prange(0, nvira_pair, blksize_a): v_aa[:,p0:p1] = _trans(lib.unpack_tril(dvvvv[p0:p1]*.25), mo_a, (nocca,nmoa,nocca,nmoa)).T v_ba = fswap.create_dataset('v_ab', (nao_pair,nvira_pair), 'f8', chunks=chunks_a) dvvOP = fswap.create_dataset('dvvOP', (nvira_pair,noccb,nmob), 'f8', chunks=(int(min(blksize_a,4e8/nmob)),1,nmob)) for i in range(noccb): buf1 = numpy.empty((nmob,nvira,nvira)) buf1[:noccb] = dOOvv[i] * .5 buf1[noccb:] = dOVvv[i] buf1 = buf1.transpose(1,2,0) + buf1.transpose(2,1,0) dvvOP[:,i] = buf1[numpy.tril_indices(nvira)] for p0, p1 in lib.prange(0, nvira_pair, blksize_a): buf1 = numpy.zeros((p1-p0,nmob,nmob)) buf1[:,noccb:,noccb:] = lib.unpack_tril(dvvVV[p0:p1] * .5) buf1[:,:noccb,:] = dvvOP[p0:p1] * .5 v_ba[:,p0:p1] = _trans(buf1, mo_b, (0,nmob,0,nmob)).T dvvOO = dvvOV = None blksize_b = int(max_memory*.9e6/8/(nao_pair+nmob**2)) blksize_b = min(nvirb_pair, max(ccsd.BLKMIN, blksize_b)) chunks_b = (int(min(nao_pair, 4e8/blksize_b)), blksize_b) v_bb = fswap.create_dataset('v_bb', (nao_pair,nvirb_pair), 'f8', chunks=chunks_b) for p0, p1 in lib.prange(0, nvirb_pair, blksize_b): v_bb[:,p0:p1] = _trans(lib.unpack_tril(dVVVV[p0:p1]*.25), mo_b, (noccb,nmob,noccb,nmob)).T time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) blksize = int(max_memory*.9e6/8/(nao_pair+nmoa**2)) blksize = min(nao_pair, max(ccsd.BLKMIN, blksize)) o_aa = fswap.create_dataset('o_aa', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize)) o_ab = fswap.create_dataset('o_ab', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize)) o_bb = fswap.create_dataset('o_bb', (nmob,noccb,nao_pair), 'f8', chunks=(noccb,noccb,blksize)) buf1 = numpy.zeros((nocca,nocca,nmoa,nmoa)) buf1[:,:,:nocca,:nocca] = _cp(doooo) * .25 buf1[:,:,nocca:,nocca:] = _cp(doovv) * .5 buf1 = _trans(buf1.reshape(nocca**2,-1), mo_a, (0,nmoa,0,nmoa)) o_aa[:nocca] = buf1.reshape(nocca,nocca,nao_pair) buf1 = numpy.zeros((nocca,nocca,nmob,nmob)) buf1[:,:,:noccb,:noccb] = _cp(dooOO) * .5 buf1[:,:,:noccb,noccb:] = _cp(dooOV) buf1[:,:,noccb:,noccb:] = _cp(dooVV) * .5 buf1 = _trans(buf1.reshape(nocca**2,-1), mo_b, (0,nmob,0,nmob)) o_ab[:nocca] = buf1.reshape(nocca,nocca,nao_pair) buf1 = numpy.zeros((noccb,noccb,nmob,nmob)) buf1[:,:,:noccb,:noccb] = _cp(dOOOO) * .25 buf1[:,:,noccb:,noccb:] = _cp(dOOVV) * .5 buf1 = _trans(buf1.reshape(noccb**2,-1), mo_b, (0,nmob,0,nmob)) o_bb[:noccb] = buf1.reshape(noccb,noccb,nao_pair) dovoo = numpy.asarray(dooov).transpose(2,3,0,1) dovOO = numpy.asarray(dOOov).transpose(2,3,0,1) dOVOO = numpy.asarray(dOOOV).transpose(2,3,0,1) for p0, p1 in lib.prange(nocca, nmoa, nocca): buf1 = numpy.zeros((nocca,p1-p0,nmoa,nmoa)) buf1[:,:,:nocca,:nocca] = dovoo[:,p0-nocca:p1-nocca] buf1[:,:,nocca:,:nocca] = dovvo[:,p0-nocca:p1-nocca] * .5 buf1[:,:,:nocca,nocca:] = dovov[:,p0-nocca:p1-nocca] * .5 buf1[:,:,nocca:,nocca:] = dovvv[:,p0-nocca:p1-nocca] buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1) buf1 = _trans(buf1, mo_a, (0,nmoa,0,nmoa)) o_aa[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair) buf1 = numpy.zeros((nocca,p1-p0,nmob,nmob)) buf1[:,:,:noccb,:noccb] = dovOO[:,p0-nocca:p1-nocca] buf1[:,:,noccb:,:noccb] = dovVO[:,p0-nocca:p1-nocca] buf1[:,:,:noccb,noccb:] = dovOV[:,p0-nocca:p1-nocca] buf1[:,:,noccb:,noccb:] = dovVV[:,p0-nocca:p1-nocca] buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1) buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob)) o_ab[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair) for p0, p1 in lib.prange(noccb, nmob, noccb): buf1 = numpy.zeros((noccb,p1-p0,nmob,nmob)) buf1[:,:,:noccb,:noccb] = dOVOO[:,p0-noccb:p1-noccb] buf1[:,:,noccb:,:noccb] = dOVVO[:,p0-noccb:p1-noccb] * .5 buf1[:,:,:noccb,noccb:] = dOVOV[:,p0-noccb:p1-noccb] * .5 buf1[:,:,noccb:,noccb:] = dOVVV[:,p0-noccb:p1-noccb] buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*noccb,-1) buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob)) o_bb[p0:p1] = buf1.reshape(p1-p0,noccb,nao_pair) time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) dovoo = buf1 = None # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) dm2a = fsave.create_dataset('dm2aa+ab', (nao_pair,nao_pair), 'f8', chunks=(int(min(nao_pair,4e8/blksize)),blksize)) dm2b = fsave.create_dataset('dm2bb+ab', (nao_pair,nao_pair), 'f8', chunks=(int(min(nao_pair,4e8/blksize)),blksize)) for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1-p0,nmoa,nmoa)) buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_aa[p0:p1])) buf1[:,:,:nocca] = o_aa[:,:,p0:p1].transpose(2,0,1) buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa)) if p0 > 0: buf1 = _cp(dm2a[:p0,p0:p1]) buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T dm2a[:p0,p0:p1] = buf1 lib.transpose_sum(buf2[:,p0:p1], inplace=True) dm2a[p0:p1] = buf2 buf1 = buf2 = None for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1-p0,nmob,nmob)) buf1[:,noccb:,noccb:] = lib.unpack_tril(_cp(v_bb[p0:p1])) buf1[:,:,:noccb] = o_bb[:,:,p0:p1].transpose(2,0,1) buf2 = _trans(buf1, mo_b, (0,nmob,0,nmob)) if p0 > 0: buf1 = _cp(dm2b[:p0,p0:p1]) buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T dm2b[:p0,p0:p1] = buf1 lib.transpose_sum(buf2[:,p0:p1], inplace=True) dm2b[p0:p1] = buf2 buf1 = buf2 = None for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1-p0,nmoa,nmoa)) buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_ba[p0:p1])) buf1[:,:,:nocca] = o_ab[:,:,p0:p1].transpose(2,0,1) buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa)) dm2a[:,p0:p1] = dm2a[:,p0:p1] + buf2.T dm2b[p0:p1] = dm2b[p0:p1] + buf2 buf1 = buf2 = None time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) if incore: return (fsave['dm2aa+ab'].value, fsave['dm2bb+ab'].value) else: return fsave
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 if d2 is None: _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) dovov = fd2intermediate['dovov'] dvvvv = fd2intermediate['dvvvv'] doooo = fd2intermediate['doooo'] doovv = fd2intermediate['doovv'] dovvo = fd2intermediate['dovvo'] dovvv = fd2intermediate['dovvv'] dooov = fd2intermediate['dooov'] else: dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name, 'w') fswap.create_group('e_vvov') fswap.create_group('c_vvov') # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) blksize = 8 for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape( nocc, nvir, nocc, nvir) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) eris_ovov = None fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2) d_ovov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) iobuflen = int(256e6 / 8 / (blksize * nvir)) log.debug1( 'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): d_ooov = _cp(dooov[p0:p1]) eris_oooo = _cp(eris.oooo[p0:p1]) eris_ooov = _cp(eris.ooov[p0:p1]) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv[p0:p1]) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) Ioo += lib.dot( _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov[p0:p1]) #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov) for i in range(p1 - p0): lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape(nocc, -1).T, 1, Ioo, 1) lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape(nocc, -1).T, 1, Xvo, 1) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2) for i in range(p1 - p0): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot( d_oovv.reshape(-1, nvir).T, _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))) eris_ooov = None d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape( p1 - p0, nocc, -1) d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape(nocc, -1).T) eris_oovv = None # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir)) ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv, iobuflen) c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair) eris_ovx = _cp(eris.ovvv[p0:p1]) ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep, eris_ovx.reshape(-1, nvir_pair), iobuflen) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) c_ovvv = d_oovv = None eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(d_ovvv[i]) eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1) #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov) Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc)) eris_ovvo = eris_ovov = None eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair)) eris_ovx = None eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir)) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_ovvo = _cp(fswap['dovvo'][p0:p1]) #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) d_ovvv = d_ovvo = eris_ovvv = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 + nvir**3 * 2.5 blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) log.debug1( 'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv) eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1])) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2)) d_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1, d_vvov.reshape(-1, nov)) d_vvvo = _cp(d_vvov.transpose(0, 2, 1)) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) d_vvov = eris_vvvv = None eris_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1, eris_vvov.reshape(-1, nov)) eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1)) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) eris_vvov = eris_vovv = d_vvvv = None del (fswap['e_vvov']) del (fswap['c_vvov']) del (fswap['dovvo']) fswap.close() _tmpfile = None if d2 is None: for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() _d2tmpfile = None Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def kernel_ms0(fci, h1e, eri, norb, nelec, ci0=None, link_index=None, tol=None, lindep=None, max_cycle=None, max_space=None, nroots=None, davidson_only=None, pspace_size=None, max_memory=None, verbose=None, ecore=0, **kwargs): if nroots is None: nroots = fci.nroots if davidson_only is None: davidson_only = fci.davidson_only if pspace_size is None: pspace_size = fci.pspace_size assert (fci.spin is None or fci.spin == 0) link_index = _unpack(norb, nelec, link_index) h1e = numpy.ascontiguousarray(h1e) eri = numpy.ascontiguousarray(eri) na = link_index.shape[0] hdiag = fci.make_hdiag(h1e, eri, norb, nelec) addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag, max(pspace_size, nroots)) if pspace_size > 0: pw, pv = fci.eig(h0) else: pw = pv = None if pspace_size >= na * na and ci0 is None and not davidson_only: # The degenerated wfn can break symmetry. The davidson iteration with proper # initial guess doesn't have this issue if na * na == 1: return pw[0] + ecore, pv[:, 0].reshape(1, 1) elif nroots > 1: civec = numpy.empty((nroots, na * na)) civec[:, addr] = pv[:, :nroots].T civec = civec.reshape(nroots, na, na) try: return pw[:nroots] + ecore, [_check_(ci) for ci in civec] except ValueError: pass elif abs(pw[0] - pw[1]) > 1e-12: civec = numpy.empty((na * na)) civec[addr] = pv[:, 0] civec = civec.reshape(na, na) civec = lib.transpose_sum(civec) * .5 # direct diagonalization may lead to triplet ground state ##TODO: optimize initial guess. Using pspace vector as initial guess may have ## spin problems. The 'ground state' of psapce vector may have different spin ## state to the true ground state. try: return pw[0] + ecore, _check_(civec.reshape(na, na)) except ValueError: pass precond = fci.make_precond(hdiag, pw, pv, addr) h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5) def hop(c): hc = fci.contract_2e(h2e, c.reshape(na, na), norb, nelec, link_index) return hc.ravel() #TODO: check spin of initial guess if ci0 is None: if hasattr(fci, 'get_init_guess'): ci0 = fci.get_init_guess(norb, nelec, nroots, hdiag) else: ci0 = [] for i in range(nroots): x = numpy.zeros(na, na) if addr[i] == 0: x[0, 0] = 1 else: addra = addr[i] // na addrb = addr[i] % na x[addra, addrb] = x[addrb, addra] = numpy.sqrt(.5) ci0.append(x.ravel()) else: if isinstance(ci0, numpy.ndarray) and ci0.size == na * na: ci0 = [ci0.ravel()] else: ci0 = [x.ravel() for x in ci0] if tol is None: tol = fci.conv_tol if lindep is None: lindep = fci.lindep if max_cycle is None: max_cycle = fci.max_cycle if max_space is None: max_space = fci.max_space if max_memory is None: max_memory = fci.max_memory if verbose is None: verbose = logger.Logger(fci.stdout, fci.verbose) #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep) e, c = fci.eig(hop, ci0, precond, tol=tol, lindep=lindep, max_cycle=max_cycle, max_space=max_space, nroots=nroots, max_memory=max_memory, verbose=verbose, follow_state=True, **kwargs) if nroots > 1: return e + ecore, [_check_(ci.reshape(na, na)) for ci in c] else: return e + ecore, _check_(c.reshape(na, na))
def _rdm2_mo2ao(mycc, d2, dm1, mo_coeff, fsave=None): log = logger.Logger(mycc.stdout, mycc.verbose) if fsave is None: _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fsave = h5py.File(_dm2file.name, 'w') else: _dm2file = None time1 = time.clock(), time.time() dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 nocc, nvir = dovov.shape[:2] nov = nocc * nvir nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 nvir_pair = nvir * (nvir + 1) // 2 mo_coeff = numpy.asarray(mo_coeff, order='F') def _trans(vin, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow, nao_pair)) fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') pao_loc = ctypes.POINTER(ctypes.c_void_p)() fdrv(_ccsd.libcc.AO2MOtranse2_nr_s1, _ccsd.libcc.CCmmm_transpose_sum, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max( 1, int(max_memory * 1e6 / 8 / (nmo * nao_pair + nmo**3 + nvir**3))) iobuflen = int(256e6 / 8 / (blksize * nmo)) log.debug1('_rdm2_mo2ao pass 1: blksize = %d, iobuflen = %d', blksize, iobuflen) fswap.create_group('o') # for h5py old version pool1 = numpy.empty((blksize, nmo, nmo, nmo)) pool2 = numpy.empty((blksize, nmo, nao_pair)) bufd_ovvv = numpy.empty((blksize, nvir, nvir, nvir)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): buf1 = pool1[:p1 - p0] buf1[:, :nocc, :nocc, :nocc] = doooo[p0:p1] buf1[:, :nocc, :nocc, nocc:] = dooov[p0:p1] buf1[:, :nocc, nocc:, :nocc] = 0 buf1[:, :nocc, nocc:, nocc:] = doovv[p0:p1] buf1[:, nocc:, :nocc, :nocc] = 0 buf1[:, nocc:, :nocc, nocc:] = dovov[p0:p1] buf1[:, nocc:, nocc:, :nocc] = dovvo[p0:p1] d_ovvv = bufd_ovvv[:p1 - p0] ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) buf1[:, nocc:, nocc:, nocc:] = d_ovvv for i in range(p0, p1): buf1[i - p0, i, :, :] += dm1 buf1[i - p0, :, :, i] -= dm1 * .5 buf2 = pool2[:p1 - p0].reshape(-1, nao_pair) _trans(buf1.reshape(-1, nmo**2), (0, nmo, 0, nmo), buf2) ao2mo.outcore._transpose_to_h5g(fswap, 'o/%d' % istep, buf2, iobuflen) pool1 = pool2 = bufd_ovvv = None time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) fswap.create_group('v') # for h5py old version pool1 = numpy.empty((blksize * nvir, nao_pair)) pool2 = numpy.empty((blksize * nvir, nvir, nvir)) for istep, (p0, p1) in enumerate(prange(0, nvir_pair, blksize * nvir)): buf1 = _cp(dvvvv[p0:p1]) buf2 = lib.unpack_tril(buf1, out=pool2[:p1 - p0]) buf1 = _trans(buf2, (nocc, nmo, nocc, nmo), out=pool1[:p1 - p0]) ao2mo.outcore._transpose_to_h5g(fswap, 'v/%d' % istep, buf1, iobuflen) pool1 = pool2 = None time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max(nao, int(max_memory * 1e6 / 8 / (nao_pair + nmo**2))) iobuflen = int(256e6 / 8 / blksize) log.debug1('_rdm2_mo2ao pass 3: blksize = %d, iobuflen = %d', blksize, iobuflen) gsave = fsave.create_group('dm2') for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)): gsave.create_dataset(str(istep), (nao_pair, p1 - p0), 'f8') diagidx = numpy.arange(nao) diagidx = diagidx * (diagidx + 1) // 2 + diagidx pool1 = numpy.empty((blksize, nmo, nmo)) pool2 = numpy.empty((blksize, nvir_pair)) pool3 = numpy.empty((blksize, nvir, nvir)) pool4 = numpy.empty((blksize, nao_pair)) for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)): buf1 = pool1[:p1 - p0] ao2mo.outcore._load_from_h5g(fswap['o'], p0, p1, buf1[:, :nocc].reshape(p1 - p0, -1)) buf2 = ao2mo.outcore._load_from_h5g(fswap['v'], p0, p1, pool2[:p1 - p0]) buf3 = lib.unpack_tril(buf2, out=pool3[:p1 - p0]) buf1[:, nocc:, nocc:] = buf3 buf1[:, nocc:, :nocc] = 0 buf2 = _trans(buf1, (0, nmo, 0, nmo), out=pool4[:p1 - p0]) ic = 0 idx = diagidx[diagidx < p1] if p0 > 0: buf1 = _cp(gsave[str(istep)][:p0]) for i0, i1 in prange(0, p1 - p0, BLKSIZE): for j0, j1, in prange(0, p0, BLKSIZE): buf1[j0:j1, i0:i1] += buf2[i0:i1, j0:j1].T buf2[i0:i1, j0:j1] = buf1[j0:j1, i0:i1].T buf1[:, idx[p0 <= idx] - p0] *= .5 gsave[str(istep)][:p0] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) buf2[:, idx] *= .5 for ic, (i0, i1) in enumerate(prange(0, nao_pair, blksize)): gsave[str(ic)][p0:p1] = buf2[:, i0:i1] time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) del (fswap['o']) del (fswap['v']) fswap.close() _tmpfile = None time1 = log.timer_debug1('_rdm2_mo2ao cleanup', *time1) if _dm2file is not None: nvir_pair = nvir * (nvir + 1) // 2 dm2 = numpy.empty((nvir_pair, nvir_pair)) ao2mo.outcore._load_from_h5g(fsave['dm2'], 0, nvir_pair, dm2) fsave.close() _dm2file = None return dm2 else: return fsave
def make_hdiag(h1e, eri, norb, nelec): hdiag = direct_spin1.make_hdiag(h1e, eri, norb, nelec) na = int(numpy.sqrt(hdiag.size)) # symmetrize hdiag to reduce numerical error hdiag = lib.transpose_sum(hdiag.reshape(na,na), inplace=True) * .5 return hdiag.ravel()
def update_amps(cc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nocc, nvir = t1.shape nov = nocc*nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) #** make_inter_F fov = fock[:nocc,nocc:].copy() foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2,-1) woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3)) time1 = log.timer_debug1('woooo', *time0) eris_ovvv = _cp(eris.ovvv) eris_ovvv = unpack_tril(eris_ovvv.reshape(nov,-1)) eris_ovvv = eris_ovvv.reshape(nocc,nvir,nvir,nvir) fvv += numpy.einsum('kc,kcba->ab', 2*t1, eris_ovvv) fvv += numpy.einsum('kc,kbca->ab', -t1, eris_ovvv) #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov) #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv) woVoV = lib.dot(_cp(eris_ooov.transpose(0,1,3,2).reshape(-1,nocc)), t1) woVoV = woVoV.reshape(nocc,nocc,nvir,nvir) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->kijb', tau, eris.ovvv) #: t2new += numpy.einsum('ka,kijb->jiba', -t1, tmp) tau = make_tau(t2, t1, t1) tmp = numpy.empty((nocc,nocc,nocc,nvir)) for k in range(nocc): tmp[k] = lib.dot(tau.reshape(-1,nvir**2), eris_ovvv[k].reshape(-1,nvir)).reshape(nocc,nocc,nvir).transpose(1,0,2) lib.dot(t1, eris_ovvv[k].reshape(nvir,-1), -1, woVoV[k].reshape(nocc,-1), 1) lib.dot(tmp.reshape(nocc,-1).T, t1, -1, t2new.reshape(-1,nvir), 1) tmp = None #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) #: t2new += woVoV.transpose() #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo, t1) wOVov, tau = tau, None lib.dot(_cp(_cp(eris.ooov).transpose(0,2,3,1).reshape(-1,nocc)), t1, -1, wOVov.reshape(-1,nvir)) #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1) lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wOVov.reshape(nocc,-1), 1) for i in range(nocc): t2new[i] += wOVov[i].transpose(0,2,1) #: theta = t2.transpose(0,1,3,2) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): theta[i] = t2[i].transpose(0,2,1) * 2 theta[i] -= t2[i] lib.dot(_cp(theta[i].transpose(0,2,1).reshape(nocc,-1)), eris_ovvv[i].reshape(-1,nvir), 1, t1new, 1) eris_ovvv = None eris_ovov = _cp(eris.ovov) for i in range(nocc): t2new[i] += eris_ovov[i].transpose(1,0,2) * .5 fov += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('jb,ijab->ia', fov, theta) #: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta) t1new += numpy.einsum('jb,jiab->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov, theta) lib.dot(_cp(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1)), theta.reshape(-1,nvir), -1, t1new, 1) eris_ooov = None #: wOVov += eris.ovov.transpose(0,1,3,2) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) wOVov = _cp(wOVov.transpose(0,3,1,2)) eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov)).reshape(nocc,nvir,-1,nvir) eris_OvoV = _cp(eris_OVov.transpose(0,3,2,1)) wOVov += eris_OVov t2iajb = t2.transpose(0,2,1,3).copy() #: wOVov[j0:j1] -= .5 * numpy.einsum('iakc,jkbc->jbai', eris_ovov, t2) lib.dot(t2iajb.reshape(-1,nov), eris_OvoV.reshape(nov,-1), -.5, wOVov.reshape(nov,-1), 1) tau, t2iajb = t2iajb, None for i in range(nocc): tau[i] = tau[i]*2 - t2[i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[i]*2, t1) #: wOVov += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.dot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1), .5, wOVov.reshape(nov,-1), 1) #theta = t2 * 2 - t2.transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp, tau = tau, None theta = _cp(theta.transpose(0,3,1,2).reshape(nov,-1)) lib.dot(wOVov.reshape(nov,-1), theta.T, 1, tmp.reshape(nov,-1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1,0,2) tmp = wOVov = eris_OvoV = eris_OVov = None #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) for i in range(nocc): tau = numpy.einsum('a,jb->jab', t1[i]*.5, t1) + t2[i] theta = tau.transpose(0,2,1)*2 - tau lib.dot(_cp(eris_ovov[i].transpose(1,2,0)).reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1) lib.dot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1) tau = theta = None eris_oovv = _cp(eris.oovv) #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv) #:t2new += numpy.einsum('ka,jibk->jiab', -t1, tmp) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(nocc): tmp = lib.dot(t1, eris_oovv[j].reshape(-1,nvir).T) tmp = _cp(tmp.reshape(nocc,nocc,nvir).transpose(0,2,1)) t2new[j] += lib.dot(tmp.reshape(-1,nocc), t1, -1).reshape(nocc,nvir,nvir).transpose(0,2,1) lib.dot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) lib.dot(tmp.reshape(-1,nocc), t1, -1, t2new[j].reshape(-1,nvir), 1) tmp = None #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov) t1new += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: woVoV -= eris.oovv #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov) #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv) woVoV -= eris_oovv woVoV = woVoV.transpose(1,3,0,2).copy() eris_oVOv = _cp(eris_ovov.transpose(0,3,2,1)) eris_oOvV = _cp(eris_ovov.transpose(0,2,1,3)) tau = make_tau(t2, t1, t1) #: woooo += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) lib.dot(eris_oOvV.reshape(-1,nvir**2), tau.reshape(-1,nvir**2).T, 1, woooo.reshape(nocc**2,-1), 1) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo, tau) lib.dot(woooo.reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) for i in range(nocc): tau[i] -= t2[i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) tau = _cp(tau.transpose(0,3,1,2)) lib.dot(tau.reshape(-1,nov), eris_oVOv.reshape(-1,nov).T, 1, woVoV.reshape(nov,-1), 1) eris_oovv = eris_ovov = eris_oOvV = taubuf = None tmp, tau = tau, None t2ibja, eris_oVOv = eris_oVOv, None for i in range(nocc): t2ibja[i] = t2[i].transpose(2,0,1) #: t2new += numpy.einsum('ibkc,kcja->ijab', woVoV, t2ibja) lib.dot(woVoV.reshape(nov,-1), t2ibja.reshape(-1,nov), 1, tmp.reshape(nov,-1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1,2,0) #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb) t2iajb = t2ibja for i in range(nocc): t2iajb[i] = t2[i].transpose(1,0,2) lib.dot(woVoV.reshape(nov,-1), t2iajb.reshape(-1,nov), 1, tmp.reshape(nov,-1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1,0,2) t2ibja = t2iajb = woVoV = tmp = None time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1) #: t2new = t2new + t2new.transpose(1,0,3,2) t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i+1): t2new_tril[ij] = t2new[i,j] t2new_tril[ij] += t2new[j,i].T ij += 1 t2new = None time1 = log.timer_debug1('t2 tril', *time1) cc.add_wvvVV_(t1, t2, eris, t2new_tril) time1 = log.timer_debug1('vvvv', *time1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] p0 = 0 for i in range(nocc): dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1)) t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2) p0 += i+1 time1 = log.timer_debug1('g2/dijab', *time1) t2new = numpy.empty((nocc,nocc,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i): t2new[i,j] = t2new_tril[ij] t2new[j,i] = t2new_tril[ij].T ij += 1 t2new[i,i] = t2new_tril[ij] ij += 1 t2new_tril = None #** update_amp_t1 t1new += fock[:nocc,nocc:] \ + numpy.einsum('ib,ab->ia', t1, fvv) \ - numpy.einsum('ja,ji->ia', t1, foo) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new /= eia #** end update_amp_t1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def kernel_ms0(fci, h1e, eri, norb, nelec, ci0=None, link_index=None, tol=None, lindep=None, max_cycle=None, max_space=None, nroots=None, davidson_only=None, pspace_size=None, max_memory=None, verbose=None, ecore=0, **kwargs): if nroots is None: nroots = fci.nroots if davidson_only is None: davidson_only = fci.davidson_only if pspace_size is None: pspace_size = fci.pspace_size if max_memory is None: max_memory = fci.max_memory - lib.current_memory()[0] log = logger.new_logger(fci, verbose) assert (fci.spin is None or fci.spin == 0) assert (0 <= numpy.sum(nelec) <= norb * 2) link_index = _unpack(norb, nelec, link_index) h1e = numpy.ascontiguousarray(h1e) eri = numpy.ascontiguousarray(eri) na = link_index.shape[0] if max_memory < na**2 * 6 * 8e-6: log.warn( 'Not enough memory for FCI solver. ' 'The minimal requirement is %.0f MB', na**2 * 60e-6) hdiag = fci.make_hdiag(h1e, eri, norb, nelec) nroots = min(hdiag.size, nroots) try: addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag, max(pspace_size, nroots)) if pspace_size > 0: pw, pv = fci.eig(h0) else: pw = pv = None if pspace_size >= na * na and ci0 is None and not davidson_only: # The degenerated wfn can break symmetry. The davidson iteration with proper # initial guess doesn't have this issue if na * na == 1: return pw[0] + ecore, pv[:, 0].reshape(1, 1) elif nroots > 1: civec = numpy.empty((nroots, na * na)) civec[:, addr] = pv[:, :nroots].T civec = civec.reshape(nroots, na, na) try: return pw[:nroots] + ecore, [_check_(ci) for ci in civec] except ValueError: pass elif abs(pw[0] - pw[1]) > 1e-12: civec = numpy.empty((na * na)) civec[addr] = pv[:, 0] civec = civec.reshape(na, na) civec = lib.transpose_sum(civec) * .5 # direct diagonalization may lead to triplet ground state ##TODO: optimize initial guess. Using pspace vector as initial guess may have ## spin problems. The 'ground state' of psapce vector may have different spin ## state to the true ground state. try: return pw[0] + ecore, _check_(civec.reshape(na, na)) except ValueError: pass except NotImplementedError: addr = [0] pw = pv = None precond = fci.make_precond(hdiag, pw, pv, addr) h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5) def hop(c): hc = fci.contract_2e(h2e, c.reshape(na, na), norb, nelec, link_index) return hc.ravel() #TODO: check spin of initial guess if ci0 is None: if callable(getattr(fci, 'get_init_guess', None)): ci0 = lambda: fci.get_init_guess(norb, nelec, nroots, hdiag) else: def ci0(): x0 = [] for i in range(nroots): x = numpy.zeros((na, na)) addra = addr[i] // na addrb = addr[i] % na if addra == addrb: x[addra, addrb] = 1 else: x[addra, addrb] = x[addrb, addra] = numpy.sqrt(.5) x0.append(x.ravel()) return x0 elif not callable(ci0): if isinstance(ci0, numpy.ndarray) and ci0.size == na * na: ci0 = [ci0.ravel()] else: ci0 = [x.ravel() for x in ci0] if tol is None: tol = fci.conv_tol if lindep is None: lindep = fci.lindep if max_cycle is None: max_cycle = fci.max_cycle if max_space is None: max_space = fci.max_space tol_residual = getattr(fci, 'conv_tol_residual', None) with lib.with_omp_threads(fci.threads): #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep) e, c = fci.eig(hop, ci0, precond, tol=tol, lindep=lindep, max_cycle=max_cycle, max_space=max_space, nroots=nroots, max_memory=max_memory, verbose=log, follow_state=True, tol_residual=tol_residual, **kwargs) if nroots > 1: return e + ecore, [_check_(ci.reshape(na, na)) for ci in c] else: return e + ecore, _check_(c.reshape(na, na))
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mf.stdout, mf.verbose) mol = mf.mol if atmlst is None: atmlst = range(mol.natm) nao, nmo = mo_coeff.shape mocc = mo_coeff[:,mo_occ>0] dm0 = numpy.dot(mocc, mocc.T) * 2 ni = copy.copy(mf._numint) if USE_XCFUN: try: ni.libxc = dft.xcfun xctype = ni._xc_type(mf.xc) except (ImportError, KeyError, NotImplementedError): ni.libxc = dft.libxc xctype = ni._xc_type(mf.xc) else: xctype = ni._xc_type(mf.xc) grids = mf.grids hyb = ni.libxc.hybrid_coeff(mf.xc) max_memory = 4000 h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) + mol.intor('cint1e_ipnuc_sph', comp=3)) offsetdic = mol.offset_nr_by_atom() h1aos = [] for i0, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] mol.set_rinv_origin(mol.atom_coord(ia)) h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3) h1ao[:,p0:p1] += h1a[:,p0:p1] h1ao = h1ao + h1ao.transpose(0,2,1) shls_slice = (shl0, shl1) + (0, mol.nbas)*3 if abs(hyb) > 1e-10: vj1, vj2, vk1, vk2 = \ _vhf.direct_bindm('cint2e_ip1_sph', 's2kl', ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'), (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 - hyb*.5*vk1 veff[:,p0:p1] += vj2 - hyb*.5*vk2 else: vj1, vj2 = \ _vhf.direct_bindm('cint2e_ip1_sph', 's2kl', ('ji->s2kl', 'lk->s1ij'), (-dm0[:,p0:p1], -dm0), 3, mol._atm, mol._bas, mol._env, shls_slice=shls_slice) for i in range(3): lib.hermi_triu(vj1[i], 1) veff = vj1 veff[:,p0:p1] += vj2 if xctype == 'LDA': ao_deriv = 1 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab): rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho = vxc[0] frr = fxc[0] half = lib.dot(ao[0], dm0[:,p0:p1].copy()) rho1 = numpy.einsum('xpi,pi->xp', ao[1:,:,p0:p1], half) aow = numpy.einsum('pi,xp->xpi', ao[0], weight*frr*rho1) aow1 = numpy.einsum('xpi,p->xpi', ao[1:,:,p0:p1], weight*vrho) aow[:,:,p0:p1] += aow1 veff[0] += lib.dot(-aow[0].T, ao[0]) veff[1] += lib.dot(-aow[1].T, ao[0]) veff[2] += lib.dot(-aow[2].T, ao[0]) half = aow = aow1 = None elif xctype == 'GGA': def get_wv(rho, rho1, weight, vxc, fxc): vgamma = vxc[1] frr, frg, fgg = fxc[:3] ngrid = weight.size sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:]) wv = numpy.empty((4,ngrid)) wv[0] = frr * rho1[0] wv[0] += frg * sigma1 * 2 wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:] wv[1:] += vgamma * rho1[1:] * 2 wv *= weight return wv ao_deriv = 2 for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab): rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA') vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3] vrho, vgamma = vxc[:2] # (d_X \nabla_x mu) nu DM_{mu,nu} half = lib.dot(ao[0], dm0[:,p0:p1].copy()) rho1X = numpy.einsum('xpi,pi->xp', ao[[1,XX,XY,XZ],:,p0:p1], half) rho1Y = numpy.einsum('xpi,pi->xp', ao[[2,YX,YY,YZ],:,p0:p1], half) rho1Z = numpy.einsum('xpi,pi->xp', ao[[3,ZX,ZY,ZZ],:,p0:p1], half) # (d_X mu) (\nabla_x nu) DM_{mu,nu} half = lib.dot(ao[1], dm0[:,p0:p1].copy()) rho1X[1] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[1] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[1] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) half = lib.dot(ao[2], dm0[:,p0:p1].copy()) rho1X[2] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[2] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[2] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) half = lib.dot(ao[3], dm0[:,p0:p1].copy()) rho1X[3] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half) rho1Y[3] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half) rho1Z[3] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half) wv = get_wv(rho, rho1X, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Y, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = get_wv(rho, rho1Z, weight, vxc, fxc) wv[0] *= .5 aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0])) wv = numpy.empty_like(rho) wv[0] = weight * vrho wv[1:] = rho[1:] * (weight * vgamma * 2) aow = numpy.einsum('npi,np->pi', ao[:4], wv) veff[0,p0:p1] -= lib.dot(ao[1,:,p0:p1].T.copy(), aow) veff[1,p0:p1] -= lib.dot(ao[2,:,p0:p1].T.copy(), aow) veff[2,p0:p1] -= lib.dot(ao[3,:,p0:p1].T.copy(), aow) aow = numpy.einsum('npi,np->pi', ao[[XX,XY,XZ],:,p0:p1], wv[1:4]) veff[0,p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[YX,YY,YZ],:,p0:p1], wv[1:4]) veff[1,p0:p1] -= lib.dot(aow.T, ao[0]) aow = numpy.einsum('npi,np->pi', ao[[ZX,ZY,ZZ],:,p0:p1], wv[1:4]) veff[2,p0:p1] -= lib.dot(aow.T, ao[0]) else: raise NotImplementedError('meta-GGA') veff = veff + veff.transpose(0,2,1) if chkfile is None: h1aos.append(h1ao+veff) else: key = 'scf_h1ao/%d' % ia lib.chkfile.save(chkfile, key, h1ao+veff) if chkfile is None: return h1aos else: return chkfile
def update_amps(cc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nocc, nvir = t1.shape nov = nocc * nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) t2new_tril = numpy.zeros((nocc * (nocc + 1) // 2, nvir, nvir)) cc.add_wvvVV_(t1, t2, eris, t2new_tril) time1 = log.timer_debug1('vvvv', *time0) ij = 0 for i in range(nocc): for j in range(i + 1): t2new[i, j] = t2new_tril[ij] ij += 1 t2new[i, i] *= .5 t2new_tril = None #** make_inter_F fov = fock[:nocc, nocc:].copy() t1new += fov foo = fock[:nocc, :nocc].copy() foo[range(nocc), range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc, nocc:], t1) fvv = fock[nocc:, nocc:].copy() fvv[range(nvir), range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc, nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2 * t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape((nocc, ) * 4) woooo = lib.transpose_sum(woooo.reshape(nocc * nocc, -1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2, -1) woooo = _cp(woooo.reshape(nocc, nocc, nocc, nocc).transpose(0, 2, 1, 3)) time1 = log.timer_debug1('woooo', *time0) eris_ovvv = _cp(eris.ovvv) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape(nov, -1)) eris_ovvv = eris_ovvv.reshape(nocc, nvir, nvir, nvir) fvv += numpy.einsum('kc,kcba->ab', 2 * t1, eris_ovvv) fvv += numpy.einsum('kc,kbca->ab', -t1, eris_ovvv) #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov) #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv) woVoV = lib.dot(_cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1) woVoV = woVoV.reshape(nocc, nocc, nvir, nvir) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->kijb', tau, eris.ovvv) #: t2new += numpy.einsum('ka,kijb->jiba', -t1, tmp) tau = make_tau(t2, t1, t1) tmp = numpy.empty((nocc, nocc, nocc, nvir)) for k in range(nocc): tmp[k] = lib.dot(tau.reshape(-1, nvir**2), eris_ovvv[k].reshape( -1, nvir)).reshape(nocc, nocc, nvir).transpose(1, 0, 2) lib.dot(t1, eris_ovvv[k].reshape(nvir, -1), -1, woVoV[k].reshape(nocc, -1), 1) lib.dot(tmp.reshape(nocc, -1).T, t1, -1, t2new.reshape(-1, nvir), 1) tmp = None #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) #: t2new += woVoV.transpose() #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo, t1) wOVov, tau = tau, None lib.dot(_cp(_cp(eris.ooov).transpose(0, 2, 3, 1).reshape(-1, nocc)), t1, -1, wOVov.reshape(-1, nvir)) #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1) lib.dot(t1, eris_ovvv.reshape(-1, nvir).T, 1, wOVov.reshape(nocc, -1), 1) for i in range(nocc): t2new[i] += wOVov[i].transpose(0, 2, 1) #: theta = t2.transpose(0,1,3,2) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): theta[i] = t2[i].transpose(0, 2, 1) * 2 theta[i] -= t2[i] lib.dot(_cp(theta[i].transpose(0, 2, 1).reshape(nocc, -1)), eris_ovvv[i].reshape(-1, nvir), 1, t1new, 1) eris_ovvv = None eris_ovov = _cp(eris.ovov) for i in range(nocc): t2new[i] += eris_ovov[i].transpose(1, 0, 2) * .5 fov += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('jb,ijab->ia', fov, theta) #: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta) t1new += numpy.einsum('jb,jiab->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov, theta) lib.dot(_cp(eris_ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)), theta.reshape(-1, nvir), -1, t1new, 1) eris_ooov = None #: wOVov += eris.ovov.transpose(0,1,3,2) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) wOVov = _cp(wOVov.transpose(0, 3, 1, 2)) eris_OVov = lib.transpose(eris_ovov.reshape(-1, nov)).reshape( nocc, nvir, -1, nvir) eris_OvoV = _cp(eris_OVov.transpose(0, 3, 2, 1)) wOVov += eris_OVov t2iajb = t2.transpose(0, 2, 1, 3).copy() #: wOVov[j0:j1] -= .5 * numpy.einsum('iakc,jkbc->jbai', eris_ovov, t2) lib.dot(t2iajb.reshape(-1, nov), eris_OvoV.reshape(nov, -1), -.5, wOVov.reshape(nov, -1), 1) tau, t2iajb = t2iajb, None for i in range(nocc): tau[i] = tau[i] * 2 - t2[i].transpose(2, 0, 1) tau[i] -= numpy.einsum('a,jb->bja', t1[i] * 2, t1) #: wOVov += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.dot(tau.reshape(-1, nov), eris_OVov.reshape(nov, -1), .5, wOVov.reshape(nov, -1), 1) #theta = t2 * 2 - t2.transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp, tau = tau, None theta = _cp(theta.transpose(0, 3, 1, 2).reshape(nov, -1)) lib.dot(wOVov.reshape(nov, -1), theta.T, 1, tmp.reshape(nov, -1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1, 0, 2) tmp = wOVov = eris_OvoV = eris_OVov = None #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) for i in range(nocc): tau = numpy.einsum('a,jb->jab', t1[i] * .5, t1) + t2[i] theta = tau.transpose(0, 2, 1) * 2 - tau lib.dot( _cp(eris_ovov[i].transpose(1, 2, 0)).reshape(nocc, -1), theta.reshape(nocc, -1).T, 1, foo, 1) lib.dot( theta.reshape(-1, nvir).T, eris_ovov[i].reshape(nvir, -1).T, -1, fvv, 1) tau = theta = None eris_oovv = _cp(eris.oovv) #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv) #:t2new += numpy.einsum('ka,jibk->jiab', -t1, tmp) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(nocc): tmp = lib.dot(t1, eris_oovv[j].reshape(-1, nvir).T) tmp = _cp(tmp.reshape(nocc, nocc, nvir).transpose(0, 2, 1)) t2new[j] += lib.dot(tmp.reshape(-1, nocc), t1, -1).reshape(nocc, nvir, nvir).transpose(0, 2, 1) lib.dot(t1, eris_ovov[j].reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1)) lib.dot(tmp.reshape(-1, nocc), t1, -1, t2new[j].reshape(-1, nvir), 1) tmp = None #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new += numpy.einsum('jb,iajb->ia', 2 * t1, eris_ovov) t1new += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: woVoV -= eris.oovv #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov) #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv) woVoV -= eris_oovv woVoV = woVoV.transpose(1, 3, 0, 2).copy() eris_oVOv = _cp(eris_ovov.transpose(0, 3, 2, 1)) eris_oOvV = _cp(eris_ovov.transpose(0, 2, 1, 3)) tau = make_tau(t2, t1, t1) #: woooo += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) lib.dot(eris_oOvV.reshape(-1, nvir**2), tau.reshape(-1, nvir**2).T, 1, woooo.reshape(nocc**2, -1), 1) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo, tau) lib.dot( woooo.reshape(-1, nocc * nocc).T, tau.reshape(-1, nvir * nvir), .5, t2new.reshape(nocc * nocc, -1), 1) for i in range(nocc): tau[i] -= t2[i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) tau = _cp(tau.transpose(0, 3, 1, 2)) lib.dot(tau.reshape(-1, nov), eris_oVOv.reshape(-1, nov).T, 1, woVoV.reshape(nov, -1), 1) eris_oovv = eris_ovov = eris_oOvV = taubuf = None tmp, tau = tau, None t2ibja, eris_oVOv = eris_oVOv, None for i in range(nocc): t2ibja[i] = t2[i].transpose(2, 0, 1) #: t2new += numpy.einsum('ibkc,kcja->ijab', woVoV, t2ibja) lib.dot(woVoV.reshape(nov, -1), t2ibja.reshape(-1, nov), 1, tmp.reshape(nov, -1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1, 2, 0) #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb) t2iajb = t2ibja for i in range(nocc): t2iajb[i] = t2[i].transpose(1, 0, 2) lib.dot(woVoV.reshape(nov, -1), t2iajb.reshape(-1, nov), 1, tmp.reshape(nov, -1)) for i in range(nocc): t2new[i] += tmp[i].transpose(1, 0, 2) t2ibja = t2iajb = woVoV = tmp = None time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5 * t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5 * t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.dot(t2.reshape(-1, nvir), ft_ab.T, 1, t2new.reshape(-1, nvir), 1) lib.dot(ft_ij.T, t2.reshape(nocc, -1), -1, t2new.reshape(nocc, -1), 1) mo_e = fock.diagonal() eia = mo_e[:nocc, None] - mo_e[None, nocc:] t1new += numpy.einsum('ib,ab->ia', t1, fvv) t1new -= numpy.einsum('ja,ji->ia', t1, foo) t1new /= eia #: t2new = t2new + t2new.transpose(1,0,3,2) for i in range(nocc): if i > 0: t2new[i, :i] += t2new[:i, i].transpose(0, 2, 1) t2new[i, :i] /= lib.direct_sum('a,jb->jab', eia[i], eia[:i]) t2new[:i, i] = t2new[i, :i].transpose(0, 2, 1) t2new[i, i] = t2new[i, i] + t2new[i, i].T t2new[i, i] /= lib.direct_sum('a,b->ab', eia[i], eia[i]) time0 = log.timer_debug1('update t1 t2', *time0) #if hasattr(pyscf, 'MKL_NUM_THREADS'): # pyscf._libmkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(1))) return t1new, t2new
def update_amps(cc, t1, t2, eris, blksize=1): time1 = time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nocc = cc.nocc nmo = cc.nmo nvir = nmo - nocc nov = nocc*nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) #** make_inter_F fov = fock[:nocc,nocc:].copy() foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = numpy.asarray(eris.ooov) woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True) woooo = woooo.reshape(nocc,nocc,nocc,nocc) + numpy.asarray(eris.oooo) woooo = numpy.asarray(woooo.transpose(0,2,1,3), order='C') time1 = log.timer_debug1('woooo', *time0) for p0, p1 in prange(0, nocc, blksize): # ==== read eris.ovvv ==== eris_ovvv = numpy.asarray(eris.ovvv[p0:p1]) eris_ovvv = unpack_tril(eris_ovvv.reshape((p1-p0)*nvir,-1)) eris_ovvv = eris_ovvv.reshape(p1-p0,nvir,nvir,nvir) eris_ooov = numpy.asarray(eris.ooov[p0:p1]) fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1], eris_ovvv) fvv += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) foo[:,p0:p1] += numpy.einsum('kc,jikc->ij', 2*t1, eris.ooov[p0:p1]) foo[:,p0:p1] += numpy.einsum('kc,jkic->ij', -t1, eris.ooov[p0:p1]) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->jiba', -t1, tmp) #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy() eris_vvov = eris_ovvv.transpose(1,2,0,3).reshape(nvir*nvir,-1) tmp = numpy.empty((nocc,nocc,p1-p0,nvir)) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1) #: tmp[j0:j1] += numpy.einsum('ijcd,cdkb->ijkb', tau, eris_vvov) lib.dot(tau.reshape(-1,nvir*nvir), eris_vvov, 1, tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0) #: t2new += numpy.einsum('ka,ijkb->jiba', -t1[p0:p1], tmp) tmp = numpy.asarray(tmp.transpose(1,0,3,2).reshape(-1,p1-p0), order='C') lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) tau = tmp = eris_vvov = None #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2) #: wovvo += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wovvo -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) #: t2new += woVoV.transpose() #: wovvo = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1) tmp = numpy.asarray(eris.ovoo[p0:p1].transpose(2,0,1,3), order='C') wovvo = lib.dot(tmp.reshape(-1,nocc), t1, -1) wovvo = wovvo.reshape(nocc,p1-p0,nvir,nvir) #: wovvo += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1) lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wovvo.reshape(nocc,-1), 1) t2new[p0:p1] += wovvo.transpose(1,0,2,3) #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv) woVoV = lib.dot(numpy.asarray(eris_ooov.transpose(0,1,3,2), order='C').reshape(-1,nocc), t1) woVoV = woVoV.reshape(p1-p0,nocc,nvir,nvir) for i in range(eris_ovvv.shape[0]): lib.dot(t1, eris_ovvv[i].reshape(nvir,-1), -1, woVoV[i].reshape(nocc,-1), 1) #: theta = t2.transpose(0,1,3,2) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = make_theta(t2[p0:p1]) #: t1new += numpy.einsum('jibc,jcba->ia', theta, eris_ovvv) lib.dot(theta.transpose(1,0,3,2).reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1) eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1) #==== mem usage blksize*(nvir**3+nocc*nvir**2*4) # ==== read eris.oOVv ==== eris_oOVv = numpy.asarray(eris.ovov[p0:p1].transpose(0,2,3,1), order='C') #==== mem usage blksize*(nocc*nvir**2*4) for i in range(p1-p0): t2new[p0+i] += eris_oOVv[i].transpose(0,2,1) * .5 fov[p0:p1] += numpy.einsum('kc,ikca->ia', t1, eris_oOVv) * 2 fov[p0:p1] -= numpy.einsum('kc,ikac->ia', t1, eris_oOVv) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('jb,ijab->ia', fov, theta) #: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta) t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta) #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta) lib.dot(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1), theta.reshape(-1,nvir), -1, t1new, 1) eris_ooov = None #: wovvo += eris.ovov.transpose(0,1,3,2) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wovvo += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wovvo -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wovvo) theta = numpy.asarray(theta.transpose(1,2,3,0).reshape(nov,-1), order='C') wovvo = wovvo.transpose(0,3,2,1) + eris_oOVv.transpose(1,2,3,0) wovvo = numpy.asarray(wovvo, order='C') eris_OVvo = eris_oOVv.transpose(1,2,3,0).reshape(nov,-1) eris_OvVo = eris_oOVv.transpose(1,3,2,0).reshape(nov,-1) for j0, j1 in prange(0, nocc, blksize): t2iajb = numpy.asarray(t2[j0:j1].transpose(0,2,1,3), order='C') #: wovvo[j0:j1] -= .5 * numpy.einsum('icka,jkbc->jbai', eris_oOVv, t2) lib.dot(t2iajb.reshape(-1,nov), eris_OvVo, -.5, wovvo[j0:j1].reshape((j1-j0)*nvir,-1), 1) tau = t2iajb for i in range(j1-j0): tau[i] *= 2 tau[i] -= t2[j0+i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1) #: wovvo[j0:j1] += .5 * numpy.einsum('ikca,jbkc->jbai', eris_oOVv, tau) lib.dot(tau.reshape(-1,nov), eris_OVvo, .5, wovvo[j0:j1].reshape((j1-j0)*nvir,-1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wovvo[j0:j1]) tmp = lib.dot(wovvo[j0:j1].reshape((j1-j0)*nvir,-1), theta.T) t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,1,3) tau = tmp = None #==== mem usage blksize*(nocc*nvir**2*8) theta = wovvo = eris_OvVo = eris_OVvo = None time2 = log.timer_debug1('wovvo [%d:%d]'%(p0, p1), *time2) #==== mem usage blksize*(nocc*nvir**2*2) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, .5) theta = make_theta(tau) #: foo += numpy.einsum('kiab,kjab->ij', eris_oOVv, theta) #: fvv -= numpy.einsum('ijca,ijcb->ab', theta, eris_oOVv) for i in range(eris_oOVv.shape[0]): lib.dot(eris_oOVv[i].reshape(nocc,-1), theta[i].reshape(nocc,-1).T, 1, foo, 1) lib.dot(theta.reshape(-1,nvir).T, eris_oOVv.reshape(-1,nvir), -1, fvv, 1) tau = theta = None # ==== read eris.oovv ==== eris_oovv = numpy.asarray(eris.oovv[p0:p1]) #==== mem usage blksize*(nocc*nvir**2*3) #: tmp = numpy.einsum('ic,kjbc->kjib', t1, eris_oovv) #: tmp += numpy.einsum('ic,kjbc->kijb', t1, eris_oOVv) tmp = lib.dot(eris_oovv.reshape(-1,nvir), t1.T).reshape(-1,nocc,nvir,nocc) tmp = numpy.asarray(tmp.transpose(0,3,2,1), order='C') lib.dot(eris_oOVv.reshape(-1,nvir), t1.T, 1, tmp.reshape(-1,nocc), 1) tmp = numpy.asarray(tmp.transpose(1,3,2,0), order='C') #: t2new += numpy.einsum('ka,jibk->ijba', -t1[p0:p1], tmp) lib.dot(tmp.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) tmp = None #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', 2*t1, eris_oOVv) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: woVoV -= eris.oovv #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov) #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv) woVoV -= eris_oovv woVoV = woVoV.transpose(1,3,0,2).copy() eris_oVOv = eris_oOVv.transpose(0,2,1,3).reshape(-1,nov) eris_oOvV = eris_oOVv.transpose(0,1,3,2).reshape(-1,nvir**2) #==== mem usage blksize*(nocc*nvir**2*4) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) lib.numpy_helper._dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir, eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir), woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc) for i in range(j1-j0): tau[i] -= t2[j0+i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ikbc->jiab', tau, eris_oOVv) tau = tau.transpose(0,3,1,2).reshape(-1,nov) lib.dot(tau, eris_oVOv.T, 1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1) #==== mem usage blksize*(nocc*nvir**2*6) time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.dot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) eris_oovv = eris_oOVv = eris_oVOv = eris_oOvV = tau = None #==== mem usage blksize*(nocc*nvir**2*1) t2iajb = numpy.asarray(t2[p0:p1].transpose(0,2,1,3), order='C') t2ibja = numpy.asarray(t2[p0:p1].transpose(0,3,1,2), order='C') for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) tmp = lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2ibja.reshape(-1,nov)) t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,3,1) tmp = None #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb) tmp = lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2iajb.reshape(-1,nov)) t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,1,3) tmp = None t2ibja = t2iajb = woVoV = None #==== mem usage blksize*(nocc*nvir**2*3) time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1) # ================== time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1) #: t2new = t2new + t2new.transpose(1,0,3,2) t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i+1): t2new_tril[ij] = t2new[i,j] t2new_tril[ij] += t2new[j,i].T ij += 1 t2new = None time1 = log.timer_debug1('t2 tril', *time1) cc.add_wvvVV_(t1, t2, eris, t2new_tril, blksize) time1 = log.timer_debug1('vvvv', *time1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] p0 = 0 for i in range(nocc): dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1)) t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2) p0 += i+1 time1 = log.timer_debug1('g2/dijab', *time1) t2new = numpy.empty((nocc,nocc,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i): t2new[i,j] = t2new_tril[ij] t2new[j,i] = t2new_tril[ij].T ij += 1 t2new[i,i] = t2new_tril[ij] ij += 1 t2new_tril = None #** update_amp_t1 t1new += fock[:nocc,nocc:] \ + numpy.einsum('ib,ab->ia', t1, fvv) \ - numpy.einsum('ja,ji->ia', t1, foo) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new /= eia #** end update_amp_t1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def update_amps(cc, t1, t2, eris, max_memory=2000): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nocc, nvir = t1.shape nov = nocc * nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) #** make_inter_F fov = fock[:nocc, nocc:].copy() foo = fock[:nocc, :nocc].copy() foo[range(nocc), range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc, nocc:], t1) fvv = fock[nocc:, nocc:].copy() fvv[range(nvir), range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc, nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2 * t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape((nocc, ) * 4) woooo = lib.transpose_sum(woooo.reshape(nocc * nocc, -1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2, -1) woooo = _cp(woooo.reshape(nocc, nocc, nocc, nocc).transpose(0, 2, 1, 3)) eris_ooov = None time1 = log.timer_debug1('woooo', *time0) unit = _memory_usage_inloop(nocc, nvir) * 1e6 / 8 max_memory = max_memory - lib.current_memory()[0] blksize = max(BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize, nocc, int((nocc + blksize - 1) // blksize)) for p0, p1 in prange(0, nocc, blksize): # ==== read eris.ovvv ==== eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) fvv += numpy.einsum('kc,kcba->ab', 2 * t1[p0:p1], eris_ovvv) fvv += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kdcb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->ijba', -t1, tmp) #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy() eris_vvov = _cp( eris_ovvv.transpose(2, 1, 0, 3).reshape(nvir * nvir, -1)) tmp = numpy.empty((nocc, nocc, p1 - p0, nvir)) taubuf = numpy.empty((blksize, nocc, nvir, nvir)) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1 - j0]) lib.dot(tau.reshape(-1, nvir * nvir), eris_vvov, 1, tmp[j0:j1].reshape((j1 - j0) * nocc, -1), 0) tmp = _cp(tmp.transpose(0, 1, 3, 2).reshape(-1, p1 - p0)) lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1, nvir), 1) tau = tmp = eris_vvov = None #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2) #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) #: t2new += woVoV.transpose() #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1) tmp = _cp(eris.ovoo[p0:p1].transpose(2, 0, 1, 3)) wOVov = lib.dot(tmp.reshape(-1, nocc), t1, -1) tmp = None wOVov = wOVov.reshape(nocc, p1 - p0, nvir, nvir) #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1) lib.dot(t1, eris_ovvv.reshape(-1, nvir).T, 1, wOVov.reshape(nocc, -1), 1) t2new[p0:p1] += wOVov.transpose(1, 0, 2, 3) eris_ooov = _cp(eris.ooov[p0:p1]) #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv) woVoV = lib.dot(_cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1) woVoV = woVoV.reshape(p1 - p0, nocc, nvir, nvir) for i in range(eris_ovvv.shape[0]): lib.dot(t1, eris_ovvv[i].reshape(nvir, -1), -1, woVoV[i].reshape(nocc, -1), 1) #: theta = t2.transpose(0,1,3,2) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = numpy.empty((p1 - p0, nocc, nvir, nvir)) for i in range(p1 - p0): theta[i] = t2[p0 + i].transpose(0, 2, 1) * 2 theta[i] -= t2[p0 + i] lib.dot(_cp(theta[i].transpose(0, 2, 1).reshape(nocc, -1)), eris_ovvv[i].reshape(-1, nvir), 1, t1new, 1) eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]' % (p0, p1), *time1) #==== mem usage blksize*(nvir**3+nocc*nvir**2*4) # ==== read eris.ovov ==== eris_ovov = _cp(eris.ovov[p0:p1]) #==== mem usage blksize*(nocc*nvir**2*4) for i in range(p1 - p0): t2new[p0 + i] += eris_ovov[i].transpose(1, 0, 2) * .5 fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('jb,ijba->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta) t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta) #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta) lib.dot(_cp(eris_ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)), theta.reshape(-1, nvir), -1, t1new, 1) eris_ooov = None #: wOVov += eris.ovov.transpose(0,1,3,2) #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) theta = _cp(theta.transpose(0, 3, 1, 2)) wOVov = _cp(wOVov.transpose(0, 3, 1, 2)) eris_OVov = lib.transpose(eris_ovov.reshape(-1, nov)).reshape( nocc, nvir, -1, nvir) eris_OvoV = _cp(eris_OVov.transpose(0, 3, 2, 1)) wOVov += eris_OVov for j0, j1 in prange(0, nocc, blksize): t2iajb = t2[j0:j1].transpose(0, 2, 1, 3).copy() #: wOVov[j0:j1] -= .5 * numpy.einsum('iack,jkbc->jbai', eris_ovov, t2) lib.dot(t2iajb.reshape(-1, nov), eris_OvoV.reshape(nov, -1), -.5, wOVov[j0:j1].reshape((j1 - j0) * nvir, -1), 1) tau, t2iajb = t2iajb, None for i in range(j1 - j0): tau[i] *= 2 tau[i] -= t2[j0 + i].transpose(2, 0, 1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0 + i] * 2, t1) #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.dot(tau.reshape(-1, nov), eris_OVov.reshape(nov, -1), .5, wOVov[j0:j1].reshape((j1 - j0) * nvir, -1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp, tau = tau, None lib.dot(wOVov[j0:j1].reshape((j1 - j0) * nvir, -1), theta.reshape(-1, nov), 1, tmp.reshape(-1, nov)) for i in range(j1 - j0): t2new[j0 + i] += tmp[i].transpose(1, 0, 2) tmp = None #==== mem usage blksize*(nocc*nvir**2*8) theta = wOVov = eris_OvoV = eris_OVov = None time2 = log.timer_debug1('wOVov [%d:%d]' % (p0, p1), *time2) #==== mem usage blksize*(nocc*nvir**2*2) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) for i in range(p1 - p0): tau = numpy.einsum('a,jb->jab', t1[p0 + i] * .5, t1) tau += t2[p0 + i] theta = tau.transpose(0, 2, 1) * 2 theta -= tau lib.dot( _cp(eris_ovov[i].transpose(1, 2, 0)).reshape(nocc, -1), theta.reshape(nocc, -1).T, 1, foo, 1) lib.dot( theta.reshape(-1, nvir).T, eris_ovov[i].reshape(nvir, -1).T, -1, fvv, 1) tau = theta = None # ==== read eris.oovv ==== eris_oovv = _cp(eris.oovv[p0:p1]) #==== mem usage blksize*(nocc*nvir**2*3) #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiab', -t1, tmp) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(p1 - p0): tmp = lib.dot(t1, eris_oovv[j].reshape(-1, nvir).T) tmp = _cp(tmp.reshape(nocc, nocc, nvir).transpose(0, 2, 1)) t2new[p0 + j] += lib.dot(tmp.reshape(-1, nocc), t1, -1).reshape(nocc, nvir, nvir).transpose(0, 2, 1) lib.dot(t1, eris_ovov[j].reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1)) lib.dot(tmp.reshape(-1, nocc), t1, -1, t2new[p0 + j].reshape(-1, nvir), 1) tmp = None #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2 * t1, eris_ovov) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: woVoV -= eris.oovv #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov) #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv) woVoV -= eris_oovv woVoV = woVoV.transpose(1, 3, 0, 2).copy() eris_oVOv = _cp(eris_ovov.transpose(0, 3, 2, 1)) eris_oOvV = _cp(eris_ovov.transpose(0, 2, 1, 3)) #==== mem usage blksize*(nocc*nvir**2*4) taubuf = numpy.empty((blksize, nocc, nvir, nvir)) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1 - j0]) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) lib.numpy_helper._dgemm('N', 'T', (p1 - p0) * nocc, (j1 - j0) * nocc, nvir * nvir, eris_oOvV.reshape(-1, nvir * nvir), tau.reshape(-1, nvir * nvir), woooo[p0:p1].reshape(-1, nocc * nocc), 1, 1, 0, 0, j0 * nocc) for i in range(j1 - j0): tau[i] -= t2[j0 + i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) lib.dot(_cp(tau.transpose(0, 3, 1, 2).reshape(-1, nov)), eris_oVOv.reshape(-1, nov).T, 1, woVoV[j0:j1].reshape( (j1 - j0) * nvir, -1), 1) #==== mem usage blksize*(nocc*nvir**2*6) time2 = log.timer_debug1('woVoV [%d:%d]' % (p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=taubuf[:p1 - p0]) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.dot(woooo[p0:p1].reshape(-1, nocc * nocc).T, tau.reshape(-1, nvir * nvir), .5, t2new.reshape(nocc * nocc, -1), 1) eris_oovv = eris_ovov = eris_oVOv = eris_oOvV = taubuf = tau = None #==== mem usage blksize*(nocc*nvir**2*1) t2iajb = _cp(t2[p0:p1].transpose(0, 2, 1, 3)) t2ibja = _cp(t2[p0:p1].transpose(0, 3, 1, 2)) tmp = numpy.empty((blksize, nvir, nocc, nvir)) for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) lib.dot(woVoV[j0:j1].reshape((j1 - j0) * nvir, -1), t2ibja.reshape(-1, nov), 1, tmp[:j1 - j0].reshape(-1, nov)) for i in range(j1 - j0): t2new[j0 + i] += tmp[i].transpose(1, 2, 0) #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb) lib.dot(woVoV[j0:j1].reshape((j1 - j0) * nvir, -1), t2iajb.reshape(-1, nov), 1, tmp[:j1 - j0].reshape(-1, nov)) for i in range(j1 - j0): t2new[j0 + i] += tmp[i].transpose(1, 0, 2) t2ibja = t2iajb = woVoV = tmp = None #==== mem usage blksize*(nocc*nvir**2*3) time1 = log.timer_debug1('contract occ [%d:%d]' % (p0, p1), *time1) # ================== time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5 * t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5 * t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.dot(t2.reshape(-1, nvir), ft_ab.T, 1, t2new.reshape(-1, nvir), 1) lib.dot(ft_ij.T, t2.reshape(nocc, -1), -1, t2new.reshape(nocc, -1), 1) #: t2new = t2new + t2new.transpose(1,0,3,2) t2new_tril = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir)) ij = 0 for i in range(nocc): for j in range(i + 1): t2new_tril[ij] = t2new[i, j] t2new_tril[ij] += t2new[j, i].T ij += 1 t2new = None time1 = log.timer_debug1('t2 tril', *time1) cc.add_wvvVV_(t1, t2, eris, t2new_tril, max_memory) time1 = log.timer_debug1('vvvv', *time1) mo_e = fock.diagonal() eia = mo_e[:nocc, None] - mo_e[None, nocc:] p0 = 0 for i in range(nocc): t2new_tril[p0:p0 + i + 1] /= lib.direct_sum('a,jb->jab', eia[i], eia[:i + 1]) p0 += i + 1 time1 = log.timer_debug1('g2/dijab', *time1) t2new = numpy.empty((nocc, nocc, nvir, nvir)) ij = 0 for i in range(nocc): for j in range(i): t2new[i, j] = t2new_tril[ij] t2new[j, i] = t2new_tril[ij].T ij += 1 t2new[i, i] = t2new_tril[ij] ij += 1 t2new_tril = None #** update_amp_t1 t1new += fock[:nocc,nocc:] \ + numpy.einsum('ib,ab->ia', t1, fvv) \ - numpy.einsum('ja,ji->ia', t1, foo) mo_e = fock.diagonal() eia = mo_e[:nocc, None] - mo_e[None, nocc:] t1new /= eia #** end update_amp_t1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def make_hdiag(h1e, eri, norb, nelec): hdiag = direct_spin1.make_hdiag(h1e, eri, norb, nelec) na = int(numpy.sqrt(hdiag.size)) # symmetrize hdiag to reduce numerical error hdiag = lib.transpose_sum(hdiag.reshape(na, na), inplace=True) * .5 return hdiag.ravel()
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: doo, dvv = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) else: doo, dvv = d1 if d2 is None: d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2) dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc,nocc)) Ivv = numpy.zeros((nvir,nvir)) Ivo = numpy.zeros((nvir,nocc)) Xvo = numpy.zeros((nvir,nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1,0,2,3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc,-1), d_oooo.reshape(nocc,-1).T, 2) d_oooo = _cp(d_oooo.transpose(0,2,3,1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot(eris_ooov.reshape(-1,nvir).T, d_oooo.reshape(nocc,-1).T, 2) Xvo +=(numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo+doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ooov = _cp(dooov) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1,nvir).T, d_ooov.reshape(-1,nvir)) Ivo += lib.dot(d_ooov.reshape(-1,nvir).T, eris_oooo.reshape(-1,nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv) tmp = _cp(d_ooov.transpose(0,1,3,2).reshape(-1,nocc)) tmpooov = _cp(eris_ooov.transpose(0,1,3,2)) Ioo += lib.dot(tmpooov.reshape(-1,nocc).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1,nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + d_ooov.transpose(1,0,2,3) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov) Ioo += lib.dot(eris_ooov.reshape(nocc,-1), d_ooov.reshape(nocc,-1).T) Xvo += lib.dot(eris_ovov.reshape(-1,nvir).T, _cp(d_ooov.transpose(0,2,3,1).reshape(nocc,-1)).T) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv + doovv.transpose(1,0,3,2)) for i in range(nocc): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc,-1).T) Ivv += lib.dot(eris_oovv.reshape(-1,nvir).T, d_oovv.reshape(-1,nvir)) Ivo += lib.dot(d_oovv.reshape(-1,nvir).T, tmpooov.reshape(-1,nocc)) d_oovv = _ccsd.precontract(d_oovv.reshape(-1,nvir,nvir)).reshape(nocc,nocc,-1) eris_ooov = tmpooov = None blksize = 4 d_ovov = numpy.empty((nocc,nvir,nocc,nvir)) for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0,p1): d_ovov[i] += d_ovvo[i-p0].transpose(0,2,1) d_ovvo = None #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1) lib.transpose_sum(d_ovov.reshape(nov,nov), inplace=True) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot(d_ovov.reshape(-1,nvir).T, _cp(eris.ovoo).reshape(-1,nocc)) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc,-1), d_ovov.reshape(nocc,-1).T) Ivv += lib.dot(eris_ovov.reshape(-1,nvir).T, d_ovov.reshape(-1,nvir)) nvir_pair = nvir * (nvir+1) // 2 bufe_ovvv = numpy.empty((blksize,nvir,nvir,nvir)) bufc_ovvv = numpy.empty((blksize,nvir,nvir_pair)) bufc_ovvv.data = bufe_ovvv.data c_vvvo = numpy.empty((nvir_pair,nvir,nocc)) for p0, p1 in prange(0, nocc, blksize): d_ovvv = numpy.empty((p1-p0,nvir,nvir,nvir)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1-p0): lib.dot(dovvv[p0+i].reshape(nvir,-1), eris_oovv[p0+i].reshape(nocc,-1).T, 1, Ivo, 1) c_ovvv = bufc_ovvv[:p1-p0] # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) _ccsd.precontract(dovvv[p0:p1].reshape(-1,nvir,nvir), out=c_ovvv) for i0, i1, in prange(0, nvir_pair, BLKSIZE): for j0, j1 in prange(0, nvir, BLKSIZE//(p1-p0)+1): c_vvvo[i0:i1,j0:j1,p0:p1] = c_ovvv[:,j0:j1,i0:i1].transpose(2,1,0) eris_ovx = _cp(eris.ovvv[p0:p1]) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1-p0): lib.dot(eris_ovx[i].reshape(nvir,-1), d_oovv[p0+i].reshape(nocc,-1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir,-1), c_ovvv[i].reshape(nvir,-1).T, 1, Ivv, 1) eris_ovvv = bufe_ovvv[:p1-p0] _ccsd.unpack_tril(eris_ovx.reshape(-1,nvir_pair), out=eris_ovvv.reshape(-1,nvir**2)) eris_ovx = None #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov) d_ovvo = _cp(d_ovov[p0:p1].transpose(0,1,3,2)) lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvo.reshape(-1,nocc), 1, Xvo, 1) e_ovvo, d_ovvo = d_ovvo, None for i in range(p1-p0): d_ovvv[i] = _ccsd.sum021(dovvv[p0+i]) e_ovvo[i] = eris_ovov[p0+i].transpose(0,2,1) #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) lib.dot(d_ovvv.reshape(-1,nvir).T, e_ovvo[:p1-p0].reshape(-1,nocc), 1, Ivo, 1) lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvv.reshape(-1,nvir), 1, Ivv, 1) Xvo[:,p0:p1] +=(numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv+dvv.T, eris_ovvv)) d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None eris_ovvv = _cp(eris.ovvv) bufe_vvvo = numpy.empty((blksize*nvir,nvir,nocc)) bufe_vvvv = numpy.empty((blksize*nvir,nvir,nvir)) bufd_vvvv = numpy.empty((blksize*nvir,nvir,nvir)) for p0, p1 in prange(0, nvir, blksize): off0 = p0*(p0+1)//2 off1 = p1*(p1+1)//2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i*(i+1)//2+i-off0] *= .5 d_vvvv = _ccsd.unpack_tril(d_vvvv, out=bufd_vvvv[:off1-off0]) eris_vvvv = _ccsd.unpack_tril(eris.vvvv[off0:off1], out=bufe_vvvv[:off1-off0]) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv) lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvv.reshape(-1,nvir), 2, Ivv, 1) d_vvvo = _cp(c_vvvo[off0:off1]) lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvo.reshape(-1,nocc), 1, Xvo, 1) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 eris_vvvo = bufe_vvvo[:off1-off0] for i0, i1 in prange(off0, off1, BLKSIZE): for j0, j1, in prange(0, nvir, BLKSIZE//nocc+1): eris_vvvo[i0-off0:i1-off0,j0:j1,:] = eris_ovvv[:,j0:j1,i0:i1].transpose(2,1,0) lib.dot(eris_vvvo.reshape(-1,nocc).T, d_vvvo.reshape(-1,nocc), 1, Ioo, 1) lib.dot(d_vvvv.reshape(-1,nvir).T, eris_vvvo.reshape(-1,nocc), 2, Ivo, 1) Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def update_amps(mycc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc*nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir)) mycc.add_wvvVV_(t1, t2, eris, t2new_tril) for i in range(nocc): for j in range(i+1): t2new[i,j] = t2new_tril[i*(i+1)//2+j] t2new[i,i] *= .5 t2new_tril = None time1 = log.timer_debug1('vvvv', *time0) #** make_inter_F fov = fock[:nocc,nocc:].copy() t1new += fov foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2,-1) woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3)) eris_ooov = None time1 = log.timer_debug1('woooo', *time1) unit = _memory_usage_inloop(nocc, nvir) max_memory = max(2000, mycc.max_memory - lib.current_memory()[0]) blksize = min(nocc, max(BLKMIN, int(max_memory/unit))) blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2)) blknvir = min(nvir, max(BLKMIN, blknvir)) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d,%d', max_memory, nocc, nvir, blksize, blknvir) nvir_pair = nvir * (nvir+1) // 2 def prefect_ovvv(p0, p1, q0, q1, prefetch): if q1 != nvir: q0, q1 = q1, min(nvir, q1+blknvir) readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch) readbuf[:] = eris.ovvv[p0:p1,q0:q1] def prefect_ovov(p0, p1, buf): buf[:] = eris.ovov[p0:p1] def prefect_oovv(p0, p1, buf): buf[:] = eris.oovv[p0:p1] buflen = max(nocc*nvir**2, nocc**3) bufs = numpy.empty((5,blksize*buflen)) buf1, buf2, buf3, buf4, buf5 = bufs for p0, p1 in prange(0, nocc, blksize): #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3) wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4) handler = None readbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir)) for q0, q1 in lib.prange(0, nvir, blknvir): if q0 == 0: readbuf[:] = eris.ovvv[p0:p1,q0:q1] else: readbuf, prefetchbuf = prefetchbuf, readbuf handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf) eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf) #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1]) eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf) eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp) if not mycc.direct: eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir)) eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1) tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1) for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2) tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau) tau += t2[j0:j1,:,q0:q1] lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1, tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0) tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2) tmp1[:] = tmp.transpose(1,0,2,3) lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) eris_vovv = tau = tmp1 = tmp = None fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv) fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv) tmp = t1[:,q0:q1].copy() for i in range(eris_ovvv.shape[0]): lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1, wooVV[i].reshape(nocc,-1)) #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1) tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1) lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) wOoVv[:,:,q0:q1] = tmp #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = tmp theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3) theta *= 2 theta -= t2[:,p0:p1,q0:q1,:] lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1) theta = tmp = None handler.join() readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1) tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1) tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3) lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1) eris_ooov = _cp(eris.ooov[p0:p1]) eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir)) handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv) tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1) tmp[:] = eris_ooov.transpose(0,1,3,2) #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1) t2new[p0:p1] += wOoVv.transpose(1,0,2,3) #:eris_oovv = _cp(eris.oovv[p0:p1]) handler.join() eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir)) handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov) #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) wooVV -= eris_oovv #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv) #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp) tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1) tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2) for j in range(p1-p0): tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1) lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2) t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir) eris_oovv = None #:eris_ovov = _cp(eris.ovov[p0:p1]) handler.join() for i in range(p1-p0): t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5 t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(p1-p0): lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1) lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1) tmp1 = tmp2 = tmp = None fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1) theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2) for i in range(p1-p0): tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau) tau += t2[p0+i] theta = lib.transpose(tau, axes=(0,2,1), out=theta) theta *= 2 theta -= tau vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau) lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1) lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1) tau = theta = vov = None #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1) #: t1new += numpy.einsum('jb,ijba->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta) theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) for i in range(p1-p0): tmp = t2[p0+i].transpose(0,2,1) * 2 tmp-= t2[p0+i] lib.ddot(eris_ooov[i].reshape(nocc,-1), tmp.reshape(-1,nvir), -1, t1new, 1) lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i]) # theta[i] = tmp.transpose(2,0,1) t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta) eris_ooov = None #: wOVov += eris.ovov #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) for i in range(p1-p0): wOoVv[:,i] += wooVV[i]*.5 #: jiba + ijba*.5 wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5) wOVov = wOVov.reshape(nocc,nvir,-1,nvir) eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3) eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir) wOVov += eris_OVov theta = theta.reshape(-1,nov) for i in range(nocc): # OVov-OVov.transpose(0,3,2,1)*.5 eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5 for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2) for i in range(j1-j0): tau[i] = t2[j0+i].transpose(1,0,2) * 2 tau[i] -= t2[j0+i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1) #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1), .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1, tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,0,2) theta = wOoVv = wOVov = eris_OVov = tmp = tau = None time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv) tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) tmp[:] = wooVV.transpose(0,2,1,3) woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir) eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3) eris_oOvV[:] = eris_ovov.transpose(0,2,1,3) eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5) eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir, eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir), woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc) for i in range(j1-j0): tau[i] -= t2[j0+i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov), eris_oVOv.reshape(-1,nov).T, 1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1) time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1), out=buf1).reshape(-1,nvir,nocc,nvir) tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2) for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,2,0) t2new[j0+i] += tmp[i].transpose(1,0,2) * .5 woVoV = t2ibja = tmp = None time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1) buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.ddot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new += numpy.einsum('ib,ab->ia', t1, fvv) t1new -= numpy.einsum('ja,ji->ia', t1, foo) t1new /= eia #: t2new = t2new + t2new.transpose(1,0,3,2) ij = 0 for i in range(nocc): for j in range(i+1): t2new[i,j] += t2new[j,i].T t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j]) t2new[j,i] = t2new[i,j].T ij += 1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None): # dm2 = ccsd_rdm._make_rdm2(mycc, None, d2, with_dm1=False) # dm2 = numpy.einsum('pi,ijkl->pjkl', mo_coeff, dm2) # dm2 = numpy.einsum('pj,ijkl->ipkl', mo_coeff, dm2) # dm2 = numpy.einsum('pk,ijkl->ijpl', mo_coeff, dm2) # dm2 = numpy.einsum('pl,ijkl->ijkp', mo_coeff, dm2) # dm2 = dm2 + dm2.transpose(1,0,2,3) # dm2 = dm2 + dm2.transpose(0,1,3,2) # return ao2mo.restore(4, dm2*.5, nmo) log = logger.Logger(mycc.stdout, mycc.verbose) time1 = time.clock(), time.time() if fsave is None: incore = True fsave = lib.H5TmpFile() else: incore = False dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 nocc, nvir = dovov.shape[:2] mo_coeff = numpy.asarray(mo_coeff, order='F') nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 nvir_pair = nvir * (nvir + 1) // 2 fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1 fmm = _ccsd.libcc.CCmmm_transpose_sum pao_loc = ctypes.POINTER(ctypes.c_void_p)() def _trans(vin, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow, nao_pair)) fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out fswap = lib.H5TmpFile() max_memory = mycc.max_memory - lib.current_memory()[0] blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)) blksize = min(nvir_pair, max(ccsd.BLKMIN, blksize)) chunks_vv = (int(min(blksize, 4e8 / blksize)), blksize) fswap.create_dataset('v', (nao_pair, nvir_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nvir_pair, blksize): fswap['v'][:, p0:p1] = _trans(lib.unpack_tril(_cp(dvvvv[p0:p1])), (nocc, nmo, nocc, nmo)).T time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)) blksize = min(nao_pair, max(ccsd.BLKMIN, blksize)) fswap.create_dataset('o', (nmo, nocc, nao_pair), 'f8', chunks=(nocc, nocc, blksize)) buf1 = numpy.zeros((nocc, nocc, nmo, nmo)) buf1[:, :, :nocc, :nocc] = doooo buf1[:, :, nocc:, nocc:] = _cp(doovv) buf1 = _trans(buf1.reshape(nocc**2, -1), (0, nmo, 0, nmo)) fswap['o'][:nocc] = buf1.reshape(nocc, nocc, nao_pair) dovoo = numpy.asarray(dooov).transpose(2, 3, 0, 1) for p0, p1 in lib.prange(nocc, nmo, nocc): buf1 = numpy.zeros((nocc, p1 - p0, nmo, nmo)) buf1[:, :, :nocc, :nocc] = dovoo[:, p0 - nocc:p1 - nocc] buf1[:, :, nocc:, :nocc] = dovvo[:, p0 - nocc:p1 - nocc] buf1[:, :, :nocc, nocc:] = dovov[:, p0 - nocc:p1 - nocc] buf1[:, :, nocc:, nocc:] = dovvv[:, p0 - nocc:p1 - nocc] buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * nocc, -1) buf1 = _trans(buf1, (0, nmo, 0, nmo)) fswap['o'][p0:p1] = buf1.reshape(p1 - p0, nocc, nao_pair) time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) dovoo = buf1 = None # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) gsave = fsave.create_dataset('dm2', (nao_pair, nao_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1 - p0, nmo, nmo)) buf1[:, nocc:, nocc:] = lib.unpack_tril(_cp(fswap['v'][p0:p1])) buf1[:, :, :nocc] = fswap['o'][:, :, p0:p1].transpose(2, 0, 1) buf2 = _trans(buf1, (0, nmo, 0, nmo)) if p0 > 0: buf1 = _cp(gsave[:p0, p0:p1]) buf1[:p0, :p1 - p0] += buf2[:p1 - p0, :p0].T buf2[:p1 - p0, :p0] = buf1[:p0, :p1 - p0].T gsave[:p0, p0:p1] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) gsave[p0:p1] = buf2 time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) if incore: return fsave['dm2'].value else: return fsave
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 if d2 is None: _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) dovov = fd2intermediate['dovov'] dvvvv = fd2intermediate['dvvvv'] doooo = fd2intermediate['doooo'] doovv = fd2intermediate['doovv'] dovvo = fd2intermediate['dovvo'] dovvv = fd2intermediate['dovvv'] dooov = fd2intermediate['dooov'] else: dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name, 'w') fswap.create_group('e_vvov') fswap.create_group('c_vvov') # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum( 'kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) blksize = 8 for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape( nocc, nvir, nocc, nvir) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) eris_ovov = None fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2) d_ovov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) iobuflen = int(256e6 / 8 / (blksize * nvir)) log.debug1( 'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): d_ooov = _cp(dooov[p0:p1]) eris_oooo = _cp(eris.oooo[p0:p1]) eris_ooov = _cp(eris.ooov[p0:p1]) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv[p0:p1]) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) Ioo += lib.dot( _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov[p0:p1]) #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov) for i in range(p1 - p0): lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape( nocc, -1).T, 1, Ioo, 1) lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape( nocc, -1).T, 1, Xvo, 1) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2) for i in range(p1 - p0): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape( nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot( d_oovv.reshape(-1, nvir).T, _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))) eris_ooov = None d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape( p1 - p0, nocc, -1) d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape( nocc, -1).T) eris_oovv = None # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir)) ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv, iobuflen) c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair) eris_ovx = _cp(eris.ovvv[p0:p1]) ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep, eris_ovx.reshape(-1, nvir_pair), iobuflen) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) c_ovvv = d_oovv = None eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(d_ovvv[i]) eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1) #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov) Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc)) eris_ovvo = eris_ovov = None eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair)) eris_ovx = None eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir)) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_ovvo = _cp(fswap['dovvo'][p0:p1]) #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) d_ovvv = d_ovvo = eris_ovvv = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 + nvir**3 * 2.5 blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) log.debug1( 'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv) eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1])) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2)) d_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1, d_vvov.reshape(-1, nov)) d_vvvo = _cp(d_vvov.transpose(0, 2, 1)) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) d_vvov = eris_vvvv = None eris_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1, eris_vvov.reshape(-1, nov)) eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1)) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) eris_vvov = eris_vovv = d_vvvv = None del (fswap['e_vvov']) del (fswap['c_vvov']) del (fswap['dovvo']) fswap.close() _tmpfile = None if d2 is None: for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() _d2tmpfile = None Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def get_eri(mydf, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl eri = pwdf_ao2mo.get_eri(mydf, kptijkl, compact=True) nao = cell.nao_nr() max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0] - nao**4*8/1e6) * .8) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL: eri *= .5 # because we'll do +cc later for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(j3cR.T, LpqR, 1, eri, 1) LpqR = LpqI = j3cR = j3cI = None eri = lib.transpose_sum(eri, inplace=True) if not compact: eri = ao2mo.restore(1, eri, nao).reshape(nao**2,-1) return eri #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1) # eri == eri.transpose(3,2,1,0).conj() # zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1) LpqR = LpqI = j3cR = j3cI = None # eri == eri.transpose(3,2,1,0).conj() eriR = lib.transpose_sum(eriR, inplace=True) buf = lib.transpose(eriI) eriI -= buf eriR = lib.transpose(eriR.reshape(-1,nao,nao), axes=(0,2,1), out=buf) eri += eriR.reshape(eri.shape) eriI = lib.transpose(eriI.reshape(-1,nao,nao), axes=(0,2,1), out=buf) eri += eriI.reshape(eri.shape)*1j return eri #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) max_memory *= .5 for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None eri += eriR eri += eriI*1j return eri