def tran_rdm2(t1, t2): nocc, nvir = t1.shape tau = numpy.empty((1, nocc, nvir, nvir)) rdm2 = numpy.empty((nocc, nocc, nvir, nvir)) for p0 in range(nocc): p1 = p0 + 1 from pyscf.cc import _ccsd _ccsd.make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=tau) theta = tau * 2 - tau.transpose(0, 1, 3, 2) rdm2[p0, :, :, :] = theta[0, :, :, :] rdm2 = numpy.einsum('ijab->iajb', rdm2) # rdm2 as iajb return rdm2 * 2.0
def update_amps(mycc, t1, t2, l1, l2, eris=None, saved=None): if saved is None: saved = make_intermediates(mycc, t1, t2, eris) time1 = time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[:nocc, :nocc] #:mba = numpy.einsum('klca,klcb->ba', l2, t2*2-t2.transpose(0,1,3,2)) #:mij = numpy.einsum('ikcd,jkcd->ij', l2, t2*2-t2.transpose(0,1,3,2)) #:theta = t2*2 - t2.transpose(0,1,3,2) theta = _ccsd.make_0132(t2, t2, 2, -1) mba = lib.dot(theta.reshape(-1, nvir).T, l2.reshape(-1, nvir)) mij = lib.dot(l2.reshape(nocc, -1), theta.reshape(nocc, -1).T) theta = None mba1 = numpy.einsum("jc,jb->bc", l1, t1) + mba mij1 = numpy.einsum("kb,jb->kj", l1, t1) + mij mia1 = ( t1 + numpy.einsum("kc,jkbc->jb", l1, t2) * 2 - numpy.einsum("kc,jkcb->jb", l1, t2) - reduce(numpy.dot, (t1, l1.T, t1)) - numpy.einsum("bd,jd->jb", mba, t1) - numpy.einsum("lj,lb->jb", mij, t1) ) tmp = mycc.add_wvvVV(numpy.zeros_like(l1), l2, eris) l2new = numpy.empty((nocc, nocc, nvir, nvir)) ij = 0 for i in range(nocc): for j in range(i): tmp1 = tmp[ij] * 0.5 # *.5 because of l2+l2.transpose(1,0,3,2) later l2new[i, j] = tmp1 l2new[j, i] = tmp1.T ij += 1 l2new[i, i] = tmp[ij] * 0.5 ij += 1 l1new = numpy.einsum("ijab,jb->ia", l2new, t1) * 4 - numpy.einsum("jiab,jb->ia", l2new, t1) * 2 tmp = tmp1 = None l1new += fov l1new += numpy.einsum("ib,ba->ia", l1, saved.w1) l1new -= numpy.einsum("ja,ij->ia", l1, saved.w2) l1new -= numpy.einsum("ik,ka->ia", mij, saved.w4) l1new -= numpy.einsum("ca,ic->ia", mba, saved.w4) l1new += numpy.einsum("ijab,bj->ia", l2, saved.w3) * 2 l1new -= numpy.einsum("ijba,bj->ia", l2, saved.w3) l2new += numpy.einsum("ia,jb->ijab", l1, saved.w4) #:l2new += numpy.einsum('jibc,ca->jiba', l2, saved.w1) #:l2new -= numpy.einsum('kiba,jk->jiba', l2, saved.w2) lib.dot(l2.reshape(-1, nvir), saved.w1, 1, l2new.reshape(-1, nvir), 1) lib.dot(saved.w2, l2.reshape(nocc, -1), -1, l2new.reshape(nocc, -1), 1) eris_ooov = _cp(eris.ooov) l1new -= numpy.einsum("jkia,kj->ia", eris_ooov, mij1) * 2 l1new += numpy.einsum("ikja,kj->ia", eris_ooov, mij1) #:l2new -= numpy.einsum('ka,kijb->jiba', l1, eris_ooov) lib.dot(_cp(eris_ooov.transpose(0, 2, 1, 3).reshape(nocc, -1)).T, l1, -1, l2new.reshape(-1, nvir), 1) eris_ooov = None tau = _ccsd.make_tau(t2, t1, t1) #:l2tau = numpy.einsum('ijcd,klcd->ijkl', l2, tau) l2tau = lib.dot(l2.reshape(nocc ** 2, -1), tau.reshape(nocc ** 2, -1).T).reshape((nocc,) * 4) tau = None #:l2t1 = numpy.einsum('jidc,kc->ijkd', l2, t1) l2t1 = lib.dot(l2.reshape(-1, nvir), t1.T).reshape(nocc, nocc, nvir, nocc) l2t1 = _cp(l2t1.transpose(1, 0, 3, 2)) max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir ** 3 * 2 + nocc * nvir ** 2, nocc * nvir ** 2 * 5) blksize = min(nocc, max(ccsd.BLKMIN, int(max_memory * 0.95e6 / 8 / unit))) log.debug1( "block size = %d, nocc = %d is divided into %d blocks", blksize, nocc, int((nocc + blksize - 1) / blksize) ) for p0, p1 in prange(0, nocc, blksize): eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) l1new[p0:p1] += numpy.einsum("iabc,bc->ia", eris_ovvv, mba1) * 2 l1new[p0:p1] -= numpy.einsum("ibca,bc->ia", eris_ovvv, mba1) #:l2new[p0:p1] += numpy.einsum('ic,jbac->jiba', l1, eris_ovvv) tmp = lib.dot(l1, eris_ovvv.reshape(-1, nvir).T) l2new[p0:p1] += tmp.reshape(nocc, -1, nvir, nvir).transpose(1, 0, 2, 3) tmp = None m4buf = numpy.empty((blksize, nocc, nvir, nvir)) eris_ovvv = _cp(eris_ovvv.transpose(0, 2, 1, 3).reshape(-1, nvir ** 2)) for j0, j1 in prange(0, nocc, blksize): #:m4 = numpy.einsum('ijkd,kadb->ijab', l2t1[j0:j1,:,p0:p1], eris_ovvv) m4 = m4buf[: j1 - j0] lib.dot(_cp(l2t1[j0:j1, :, p0:p1].reshape((j1 - j0) * nocc, -1)), eris_ovvv, 1, m4.reshape(-1, nvir ** 2)) l2new[j0:j1] -= m4 l1new[j0:j1] -= numpy.einsum("ijab,jb->ia", m4, t1) * 2 l1new -= numpy.einsum("ijab,ia->jb", m4, t1[j0:j1]) * 2 l1new += numpy.einsum("jiab,jb->ia", m4, t1[j0:j1]) l1new[j0:j1] += numpy.einsum("jiab,ia->jb", m4, t1) eris_ovvv = m4buf = m4 = None # ==== mem usage nvir**3*2 + nocc*nvir**2 eris_ovov = _cp(eris.ovov[p0:p1]) l1new[p0:p1] += numpy.einsum("jb,iajb->ia", l1, eris_ovov) * 2 for i in range(p1 - p0): l2new[p0 + i] += eris_ovov[i].transpose(1, 0, 2) * 0.5 #:l2new[p0:p1] -= numpy.einsum('icjb,ca->ijab', eris_ovov, mba1) #:l2new[p0:p1] -= numpy.einsum('jbka,ik->jiba', eris_ovov, mij1) tmp = numpy.empty((nocc, nvir, nvir)) for j in range(p0, p1): lib.dot(eris_ovov[j - p0].reshape(nvir, -1).T, mba1, 1, tmp.reshape(-1, nvir)) l2new[j] -= tmp.transpose(0, 2, 1) lib.dot( mij1, _cp(eris_ovov[j - p0].transpose(1, 0, 2).reshape(nocc, -1)), -1, l2new[j].reshape(nocc, -1), 1 ) tmp = None l1new[p0:p1] += numpy.einsum("iajb,jb->ia", eris_ovov, mia1) * 2 l1new[p0:p1] -= numpy.einsum("ibja,jb->ia", eris_ovov, mia1) m4buf = numpy.empty((blksize, nocc, nvir, nvir)) for j0, j1 in prange(0, nocc, blksize): #:m4 = numpy.einsum('kalb,ijkl->ijab', eris_ovov, l2tau[j0:j1,:,p0:p1]) m4 = m4buf[: j1 - j0] lib.dot( l2tau[j0:j1, :, p0:p1].reshape((j1 - j0) * nocc, -1).copy(), _cp(eris_ovov.transpose(0, 2, 1, 3).reshape(-1, nvir ** 2)), 0.5, m4.reshape(-1, nvir ** 2), ) l2new[j0:j1] += m4 l1new[j0:j1] += numpy.einsum("ijab,jb->ia", m4, t1) * 4 l1new[j0:j1] -= numpy.einsum("ijba,jb->ia", m4, t1) * 2 eris_ovov = m4buf = m4 = None # ==== mem usage nocc*nvir**2 * 3 eris_oovv = _cp(eris.oovv[p0:p1]) l1new[p0:p1] -= numpy.einsum("jb,ijba->ia", l1, eris_oovv) eris_oovv = None saved_wooov = _cp(saved.wooov[p0:p1]) #:l1new[p0:p1] -= numpy.einsum('jkca,ijkc->ia', l2, saved_wooov) l1new[p0:p1] -= lib.dot(saved_wooov.reshape(p1 - p0, -1), l2.reshape(-1, nvir)) saved_wovvv = _cp(saved.wovvv[p0:p1]) #:l1new += numpy.einsum('kibc,kabc->ia', l2[p0:p1], saved_wovvv) for j in range(p1 - p0): lib.dot(l2[p0 + j].reshape(nocc, -1), saved_wovvv[j].reshape(nvir, -1).T, 1, l1new, 1) saved_wooov = saved_wovvv = None # ==== mem usage nvir**3 + nocc**2*nvir saved_wOvOv = _cp(saved.wOvOv[p0:p1]) tmp_ovov = _cp(saved.wOVov[p0:p1]) * 2 tmp_ovov += saved_wOvOv tmp_ovov = lib.transpose(tmp_ovov.reshape(-1, nov)).reshape(nocc, nvir, -1, nvir) tmp1 = numpy.empty((p1 - p0, nvir, nocc, nvir)) tmp = numpy.empty((blksize, nvir, nocc, nvir)) for j0, j1 in prange(0, nocc, blksize): #:tmp = l2[j0:j1].transpose(0,2,1,3) - l2[j0:j1].transpose(0,3,1,2)*.5 #:l2new[p0:p1] += numpy.einsum('kcia,kcjb->jiba', tmp, tmp_ovov[j0:j1]) for i in range(j1 - j0): tmp[i] = -0.5 * l2[j0 + i].transpose(2, 0, 1) tmp[i] += l2[j0 + i].transpose(1, 0, 2) lib.dot( tmp_ovov[j0:j1].reshape((j1 - j0) * nvir, -1).T, tmp[: j1 - j0].reshape((j1 - j0) * nvir, -1), 1, tmp1.reshape(-1, nov), ) l2new[p0:p1] += tmp1.transpose(0, 2, 1, 3) tmp = tmp1 = tmp_ovov = None # ==== mem usage nocc*nvir**2 * 5 #:tmp = numpy.einsum('jkca,ibkc->ijab', l2, saved_wOvOv) tmp = numpy.empty((p1 - p0, nvir, nvir)) for j in range(nocc): lib.dot(saved_wOvOv.reshape(-1, nov), l2[j].reshape(nov, -1), 1, tmp.reshape(-1, nvir)) l2new[p0:p1, j] += tmp.transpose(0, 2, 1) l2new[p0:p1, j] += tmp * 0.5 saved_wOvOv = tmp = None saved_woooo = _cp(saved.woooo[p0:p1]) #:m3 = numpy.einsum('klab,ijkl->ijab', l2, saved_woooo) m3 = lib.dot(saved_woooo.reshape(-1, nocc ** 2), l2.reshape(nocc ** 2, -1), 0.5).reshape(-1, nocc, nvir, nvir) l2new[p0:p1] += m3 l1new[p0:p1] += numpy.einsum("ijab,jb->ia", m3, t1) * 4 l1new[p0:p1] -= numpy.einsum("ijba,jb->ia", m3, t1) * 2 saved_woooo = m3 = None time1 = log.timer_debug1("lambda pass [%d:%d]" % (p0, p1), *time1) mo_e = eris.fock.diagonal() eia = lib.direct_sum("i-a->ia", mo_e[:nocc], mo_e[nocc:]) l1new /= eia l1new += l1 # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 ij = 0 for i in range(nocc): for j in range(i): dab = lib.direct_sum("a+b->ab", eia[i], eia[j]) tmp = (l2new[i, j] + l2new[j, i].T) / dab + l2[i, j] l2new[i, j] = tmp l2new[j, i] = tmp.T ij += 1 dab = lib.direct_sum("a+b->ab", eia[i], eia[i]) l2new[i, i] = (l2new[i, i] + l2new[i, i].T) / dab + l2[i, i] ij += 1 time0 = log.timer_debug1("update l1 l2", *time0) return l1new, l2new
def make_intermediates(mycc, t1, t2, eris, max_memory=2000): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[nocc:, nocc:] class _Saved(object): def __init__(self): self._tmpfile = tempfile.NamedTemporaryFile() self.ftmp = h5py.File(self._tmpfile.name) def __del__(self): if hasattr(self, 'ftmp'): self.ftmp.close() self._tmpfile = None saved = _Saved() saved.woooo = saved.ftmp.create_dataset('woooo', (nocc, nocc, nocc, nocc), 'f8') saved.wooov = saved.ftmp.create_dataset('wooov', (nocc, nocc, nocc, nvir), 'f8') saved.wOVov = saved.ftmp.create_dataset('wOVov', (nocc, nvir, nocc, nvir), 'f8') saved.wOvOv = saved.ftmp.create_dataset('wOvOv', (nocc, nvir, nocc, nvir), 'f8') saved.wovvv = saved.ftmp.create_dataset('wovvv', (nocc, nvir, nvir, nvir), 'f8') # As we don't have l2 in memory, hold tau temporarily in memory w1 = fvv - numpy.einsum('ja,jb->ba', fov, t1) w2 = foo + numpy.einsum('ib,jb->ij', fov, t1) w3 = numpy.einsum('kc,jkbc->bj', fov, t2) * 2 + fov.T w3 -= numpy.einsum('kc,kjbc->bj', fov, t2) w3 += reduce(numpy.dot, (t1.T, fov, t1.T)) w4 = fov.copy() _tmpfile = tempfile.NamedTemporaryFile() fswap = h5py.File(_tmpfile.name) time1 = time.clock(), time.time() max_memory = max_memory - lib.current_memory()[0] unit = max(nocc * nvir**2 * 4 + nvir**3 * 2, nvir**3 * 3 + nocc * nvir**2, nocc * nvir**2 * 6 + nocc**2 * nvir + nocc**3 + nocc**2 * nvir) blksize = max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1( 'ccsd lambda make_intermediates: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) // blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) w1 += numpy.einsum('jcba,jc->ba', eris_ovvv, t1[p0:p1] * 2) w1 -= numpy.einsum('jabc,jc->ba', eris_ovvv, t1[p0:p1]) #:w3 += numpy.einsum('kdcb,kjdc->bj', eris_ovvv, theta) for i in range(p1 - p0): theta = t2[p0 + i] * 2 theta -= t2[p0 + i].transpose(0, 2, 1) w3 += lib.dot(eris_ovvv[i].reshape(-1, nvir).T, _cp(theta.reshape(nocc, -1)).T) theta = None #:wOVov = numpy.einsum('jbcd,kd->jbkc', eris_ovvv, t1) #:wOvOv = numpy.einsum('jdcb,kd->jbkc', eris_ovvv, -t1) wOVov = lib.dot(eris_ovvv.reshape(-1, nvir), t1.T).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() g2ovvv = _cp(eris_ovvv.transpose(0, 2, 3, 1)) wOvOv = lib.dot(g2ovvv.reshape(-1, nvir), -t1.T).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() for i in range(p1 - p0): g2ovvv[i] *= 2 g2ovvv[i] -= eris_ovvv[i].transpose(1, 0, 2) wooov = numpy.empty((p1 - p0, nocc, nocc, nvir)) woooo = numpy.empty((p1 - p0, nocc, nocc, nocc)) eris_ovov = _cp(_cp(eris.ovov[p0:p1]).transpose(0, 2, 1, 3)) for j0, j1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:wooov[:,j0:j1] = numpy.einsum('icbd,jkbd->ijkc', g2ovvv, tau) #:woooo[:,:,j0:j1] = numpy.einsum('icjd,klcd->ijkl', eris_ovov, tau) tmp = lib.dot(g2ovvv.reshape(-1, nvir**2), tau.reshape(-1, nvir**2).T) wooov[:, j0:j1] = tmp.reshape(-1, nvir, j1 - j0, nocc).transpose(0, 2, 3, 1) woooo[:, :, j0:j1] = lib.dot(eris_ovov.reshape(-1, nvir**2), tau.reshape(-1, nvir**2).T).reshape( -1, nocc, j1 - j0, nocc) eris_ovov = eris_ovvv = g2ovvv = tau = tmp = None #==== mem usage nocc*nvir**2*2 + nocc**2*nvir + nocc**3 + nvir**3*2 + nocc*nvir**2*2 eris_ooov = _cp(eris.ooov[p0:p1]) w2[p0:p1] += numpy.einsum('ijkb,kb->ij', eris_ooov, t1) * 2 w2 -= numpy.einsum('kjib,kb->ij', eris_ooov, t1[p0:p1]) #:w3 -= numpy.einsum('kjlc,klbc->bj', eris_ooov, theta) for i in range(p1 - p0): theta = t2[p0 + i].transpose(0, 2, 1) * 2 theta -= t2[p0 + i] w3 -= lib.dot( theta.reshape(-1, nvir).T, eris_ooov[i].reshape(nocc, -1).T) theta = None #:woooo += numpy.einsum('ikjc,lc->ijkl', eris_ooov, t1) #:wOvOv += numpy.einsum('jklb,lc->jbkc', eris_ooov, t1) woooo += lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape( (-1, nocc, nocc, nocc)).transpose(0, 2, 1, 3) for i in range(p1 - p0): lib.dot( _cp(eris_ooov[i].transpose(2, 0, 1)).reshape(-1, nocc), t1, 1, wOvOv[i].reshape(-1, nvir), 1) wooov[i] += eris_ooov[i].transpose(1, 0, 2) * 2 wooov[i] -= eris_ooov[i] eris_ovoo = _cp(eris.ovoo[p0:p1]) #:woooo += numpy.einsum('icjl,kc->ijkl', eris_ovoo, t1) #:wOVov += numpy.einsum('jbkl,lc->jbkc', eris_ovoo, -t1) for i in range(p1 - p0): woooo[i] += lib.dot(t1, eris_ovoo[i].reshape(nvir, -1)).reshape( (nocc, ) * 3).transpose(1, 0, 2) lib.dot(eris_ovoo.reshape(-1, nocc), t1, -1, wOVov.reshape(-1, nvir), 1) #:wooov -= numpy.einsum('iblj,klbc->ijkc', eris_ovoo*1.5, t2) tmp_ovoo = _cp(-eris_ovoo.transpose(0, 2, 3, 1)).reshape(-1, nov) for j in range(nocc): wooov[:, :, j] += lib.dot(tmp_ovoo, t2[j].reshape(-1, nvir), 1.5).reshape(-1, nocc, nvir) #:g2ooov = eris_ooov * 2 - eris_ovoo.transpose(0,3,2,1) g2ooov, tmp_ovoo = tmp_ovoo.reshape(p1 - p0, nocc, nocc, nvir), None g2ooov += eris_ooov * 2 thetabuf = numpy.empty((blksize, nvir, nocc, nvir)) vikjc = numpy.empty((p1 - p0, nocc, blksize, nvir)) for j0, j1 in prange(0, nocc, blksize): theta = thetabuf[:j1 - j0] for i in range(j1 - j0): theta[i] = t2[j0 + i].transpose( 1, 0, 2) * 2 - t2[j0 + i].transpose(2, 0, 1) #:vikjc = numpy.einsum('iklb,jlcb->ikjc', g2ooov, theta) if j1 == j0 + blksize: lib.dot(g2ooov.reshape(-1, nov), _cp(theta.reshape(-1, nov)).T, 1, vikjc.reshape((p1 - p0) * nocc, -1), 0) else: vikjc = lib.dot(g2ooov.reshape(-1, nov), _cp(theta.reshape(-1, nov)).T) vikjc = vikjc.reshape(p1 - p0, nocc, j1 - j0, nvir) wooov[:, j0:j1, :] += vikjc.transpose(0, 2, 1, 3) wooov[:, :, j0:j1] -= vikjc * .5 eris_ooov = eris_ovoo = g2ooov = vikjc = theta = thetabuf = None #==== mem usage nocc*nvir**2*3 + nocc**2*nvir + nocc**3 + nocc*nvir**2 + nocc**2*nvir*3 eris_ovov = _cp(eris.ovov[p0:p1]) g2ovov = eris_ovov * 2 g2ovov -= eris_ovov.transpose(0, 3, 2, 1) tmpw4 = numpy.einsum('kcld,ld->kc', g2ovov, t1) #:w1 -= numpy.einsum('kcja,kjcb->ba', g2ovov, t2[p0:p1]) w1 -= lib.dot(t2[p0:p1].reshape(-1, nvir).T, _cp(g2ovov.transpose(0, 2, 1, 3).reshape(-1, nvir))) w1 -= numpy.einsum('ja,jb->ba', tmpw4, t1[p0:p1]) #:w2[p0:p1] += numpy.einsum('ibkc,jkbc->ij', g2ovov, t2) w2[p0:p1] += lib.dot( _cp(g2ovov.transpose(0, 2, 1, 3)).reshape(p1 - p0, -1), t2.reshape(nocc, -1).T) w2[p0:p1] += numpy.einsum('ib,jb->ij', tmpw4, t1) w3 += reduce(numpy.dot, (t1[p0:p1].T, tmpw4, t1.T)) w4[p0:p1] += tmpw4 vOVov = numpy.empty((nocc, nvir, p1 - p0, nvir)) #:vOVov += numpy.einsum('jbld,klcd->kcjb', g2ovov, t2) #:vOVov -= numpy.einsum('jbld,kldc->kcjb', eris_ovov, t2) for j in range(nocc): lib.dot( _cp(t2[j].transpose(1, 0, 2)).reshape(-1, nov), g2ovov.reshape(-1, nov).T, 1, vOVov[j].reshape(nvir, -1)) lib.dot(t2[j].reshape(nov, -1).T, eris_ovov.reshape(-1, nov).T, -1, vOVov[j].reshape(nvir, -1), 1) vOVov = lib.transpose(vOVov.reshape(nov, -1)).reshape( p1 - p0, nvir, nocc, nvir) vOVov += eris_ovov g2ovov = tmp = tmpw4 = None #==== mem usage nocc*nvir**2*4 + nocc**2*nvir + nocc**3 + nocc*nvir**2 #:tmp = numpy.einsum('jbld,kd->jlbk', eris_ovov, t1) #:wOVov -= numpy.einsum('jlbk,lc->jbkc', tmp, t1) #:tmp = numpy.einsum('jdlb,kd->jlbk', eris_ovov, t1) #:wOvOv += numpy.einsum('jlbk,lc->jbkc', tmp, t1) tmp = numpy.empty((nocc, nvir, nocc)) for j in range(p1 - p0): lib.dot( _cp(eris_ovov[j].transpose(1, 0, 2)).reshape(-1, nvir), t1.T, 1, tmp.reshape(-1, nocc)) lib.dot( tmp.reshape(nocc, -1).T, t1, -1, wOVov[j].reshape(-1, nvir), 1) lib.dot(eris_ovov[j].reshape(nvir, -1).T, t1.T, 1, tmp.reshape(-1, nocc)) lib.dot( tmp.reshape(nocc, -1).T, t1, 1, wOvOv[j].reshape(-1, nvir), 1) tmp = None #:vOvOv = numpy.einsum('jdlb,kldc->kcjb', eris_ovov, t2) ovovtmp = _cp(eris_ovov.transpose(0, 3, 2, 1)).reshape(-1, nov) vOvOv = numpy.empty((nocc, nvir, p1 - p0, nvir)) for j in range(nocc): lib.dot(t2[j].reshape(-1, nvir).T, ovovtmp.T, 1, vOvOv[j].reshape(nvir, -1)) ovovtmp = eris_ovov = None vOvOv = lib.transpose(vOvOv.reshape(nov, -1)).reshape( p1 - p0, nvir, nocc, nvir) vOvOv -= _cp(eris.oovv[p0:p1]).transpose(0, 3, 1, 2) wOVov += vOVov wOvOv += vOvOv saved.wOVov[p0:p1] = wOVov saved.wOvOv[p0:p1] = wOvOv wOVov = wOvOv = None #==== mem usage nocc*nvir**2*6 + nocc**2*nvir + nocc**3 + nocc**2*nvir ov1 = vOvOv * 2 + vOVov #:wooov -= numpy.einsum('ibkc,jb->ijkc', ov1, t1) for i in range(p1 - p0): lib.dot(t1, ov1[i].reshape(nvir, -1), -1, wooov[i].reshape(nocc, -1), 1) ov1 = lib.transpose(ov1.reshape(-1, nov)) fswap['2vOvOv/%d' % istep] = ov1.reshape(nocc, nvir, -1, nvir) ov1 = None ov2 = vOVov * 2 + vOvOv w3 += numpy.einsum('kcjb,kc->bj', ov2, t1[p0:p1]) #:wooov += numpy.einsum('ibjc,kb->ijkc', ov2, t1) for i in range(p1 - p0): wooov[i] += lib.dot(t1, ov2[i].reshape(nvir, -1)).reshape( nocc, nocc, nvir).transpose(1, 0, 2) ov2 = lib.transpose(ov2.reshape(-1, nov)) fswap['2vovOV/%d' % istep] = ov2.reshape(nocc, nvir, -1, nvir) vOVov = vOvOv = None ov2 = None #==== mem usage nocc*nvir**2*5 + nocc**2*nvir + nocc**3 woooo += _cp(eris.oooo[p0:p1]).transpose(0, 2, 1, 3) saved.woooo[p0:p1] = woooo saved.wooov[p0:p1] = wooov woooo = wooov = None time1 = log.timer_debug1('pass1 [%d:%d]' % (p0, p1), *time1) w3 += numpy.einsum('bc,jc->bj', w1, t1) w3 -= numpy.einsum('kj,kb->bj', w2, t1) for p0, p1 in prange(0, nocc, blksize): eris_ooov = _cp(eris.ooov[p0:p1]) g2ooov = eris_ooov * 2 g2ooov -= eris_ooov.transpose(0, 2, 1, 3) #:tmp = numpy.einsum('kjla,jb->kabl', g2ooov, t1) #:wovvv = numpy.einsum('kabl,lc->kabc', tmp, t1) #:wovvv += numpy.einsum('kjla,jlbc->kabc', g2ooov, t2) tmp = lib.dot( _cp(g2ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)).T, t1).reshape(-1, nocc, nvir, nvir).transpose(0, 2, 3, 1) wovvv = lib.dot(_cp(tmp.reshape(-1, nocc)), t1).reshape(-1, nvir, nvir, nvir) wovvv += lib.dot( _cp(g2ooov.transpose(0, 3, 1, 2).reshape(-1, nocc**2)), t2.reshape(nocc**2, -1)).reshape(-1, nvir, nvir, nvir) tmp = g2ooov = None ov1 = numpy.empty((p1 - p0, nvir, nocc, nvir)) ov2 = numpy.empty((p1 - p0, nvir, nocc, nvir)) for istep, (j0, j1) in enumerate(prange(0, nocc, blksize)): ov1[:, :, j0:j1] = fswap['2vOvOv/%d' % istep][p0:p1] ov2[:, :, j0:j1] = fswap['2vovOV/%d' % istep][p0:p1] #:wovvv += numpy.einsum('kcja,jb->kabc', ov1, t1) #:wovvv -= numpy.einsum('kbja,jc->kabc', ov2, t1) wovvv += lib.dot(_cp(ov1.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1).reshape(-1, nvir, nvir, nvir).transpose(0, 2, 3, 1) wovvv -= lib.dot(_cp(ov2.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1).reshape(-1, nvir, nvir, nvir).transpose(0, 2, 1, 3) #==== mem usage nvir**3 + nocc*nvir**2*2 eris_ooov = ov1 = ov2 = None for j0, j1 in prange(0, nocc, blksize): eris_ovvv = _cp(eris.ovvv[j0:j1]) eris_ovvv = _ccsd.unpack_tril( eris_ovvv.reshape((j1 - j0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(j1 - j0, nvir, nvir, nvir) #:wovvv += numpy.einsum('jabd,kjdc->kabc', eris_ovvv, t2[p0:p1,j0:j1]) * -1.5 tmp_ovvv = numpy.empty((j1 - j0, nvir, nvir, nvir)) for i in range(j1 - j0): tmp_ovvv[i] = eris_ovvv[i].transpose(1, 0, 2) * 2 tmp = lib.dot( _cp(t2[p0:p1, j0:j1].transpose(0, 3, 1, 2).reshape( (p1 - p0) * nvir, -1)), tmp_ovvv.reshape(-1, nvir**2), -1.5 / 2).reshape(-1, nvir, nvir, nvir) wovvv += tmp.transpose(0, 2, 3, 1) if p0 == j0: for i in range(p1 - p0): tmp_ovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) wovvv[i] += tmp_ovvv[i] tmp = tmp_ovvv = None g2ovvv = numpy.empty((j1 - j0, nvir, nvir, nvir)) for i in range(j1 - j0): g2ovvv[i] = eris_ovvv[i] * 2 g2ovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) #==== mem usage nvir**3*3 eris_ovvv = None theta = _cp(t2[p0:p1, j0:j1].transpose(0, 2, 1, 3) * 2) for i in range(p1 - p0): theta[i] -= t2[p0 + i, j0:j1].transpose(2, 0, 1) #:vkbca = numpy.einsum('jdca,kbjd->kbca', g2ovvv, theta) vkbca = lib.dot(theta.reshape((p1 - p0) * nvir, -1), g2ovvv.reshape(-1, nvir * nvir)).reshape( -1, nvir, nvir, nvir) wovvv += vkbca.transpose(0, 3, 1, 2) wovvv -= vkbca.transpose(0, 3, 2, 1) * .5 #==== mem usage nvir**3*3 + nocc*nvir**2 g2ovvv = theta = vkabc = None saved.wovvv[p0:p1] = wovvv time1 = log.timer_debug1('pass2 [%d:%d]' % (p0, p1), *time1) fswap.close() saved.w1 = w1 saved.w2 = w2 saved.w3 = w3 saved.w4 = w4 saved.ftmp.flush() return saved
def update_amps(mycc, t1, t2, l1, l2, eris=None, saved=None, max_memory=2000): if saved is None: saved = make_intermediates(mycc, t1, t2, eris) time1 = time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[:nocc, :nocc] #:mba = numpy.einsum('klca,klcb->ba', l2, t2*2-t2.transpose(0,1,3,2)) #:mij = numpy.einsum('ikcd,jkcd->ij', l2, t2*2-t2.transpose(0,1,3,2)) #:theta = t2*2 - t2.transpose(0,1,3,2) theta = _ccsd.make_0132(t2, t2, 2, -1) mba = lib.dot(theta.reshape(-1, nvir).T, l2.reshape(-1, nvir)) mij = lib.dot(l2.reshape(nocc, -1), theta.reshape(nocc, -1).T) theta = None mba1 = numpy.einsum('jc,jb->bc', l1, t1) + mba mij1 = numpy.einsum('kb,jb->kj', l1, t1) + mij mia1 = (t1 + numpy.einsum('kc,jkbc->jb', l1, t2) * 2 - numpy.einsum('kc,jkcb->jb', l1, t2) - reduce(numpy.dot, (t1, l1.T, t1)) - numpy.einsum('bd,jd->jb', mba, t1) - numpy.einsum('lj,lb->jb', mij, t1)) tmp = mycc.add_wvvVV(numpy.zeros_like(l1), l2, eris, max_memory) l2new = numpy.empty((nocc, nocc, nvir, nvir)) ij = 0 for i in range(nocc): for j in range(i): tmp1 = tmp[ij] * .5 # *.5 because of l2+l2.transpose(1,0,3,2) later l2new[i, j] = tmp1 l2new[j, i] = tmp1.T ij += 1 l2new[i, i] = tmp[ij] * .5 ij += 1 l1new = (numpy.einsum('ijab,jb->ia', l2new, t1) * 4 - numpy.einsum('jiab,jb->ia', l2new, t1) * 2) tmp = tmp1 = None l1new += fov l1new += numpy.einsum('ib,ba->ia', l1, saved.w1) l1new -= numpy.einsum('ja,ij->ia', l1, saved.w2) l1new -= numpy.einsum('ik,ka->ia', mij, saved.w4) l1new -= numpy.einsum('ca,ic->ia', mba, saved.w4) l1new += numpy.einsum('ijab,bj->ia', l2, saved.w3) * 2 l1new -= numpy.einsum('ijba,bj->ia', l2, saved.w3) l2new += numpy.einsum('ia,jb->ijab', l1, saved.w4) #:l2new += numpy.einsum('jibc,ca->jiba', l2, saved.w1) #:l2new -= numpy.einsum('kiba,jk->jiba', l2, saved.w2) lib.dot(l2.reshape(-1, nvir), saved.w1, 1, l2new.reshape(-1, nvir), 1) lib.dot(saved.w2, l2.reshape(nocc, -1), -1, l2new.reshape(nocc, -1), 1) eris_ooov = _cp(eris.ooov) l1new -= numpy.einsum('jkia,kj->ia', eris_ooov, mij1) * 2 l1new += numpy.einsum('ikja,kj->ia', eris_ooov, mij1) #:l2new -= numpy.einsum('ka,kijb->jiba', l1, eris_ooov) lib.dot( _cp(eris_ooov.transpose(0, 2, 1, 3).reshape(nocc, -1)).T, l1, -1, l2new.reshape(-1, nvir), 1) eris_ooov = None tau = _ccsd.make_tau(t2, t1, t1) #:l2tau = numpy.einsum('ijcd,klcd->ijkl', l2, tau) l2tau = lib.dot(l2.reshape(nocc**2, -1), tau.reshape(nocc**2, -1).T).reshape((nocc, ) * 4) tau = None #:l2t1 = numpy.einsum('jidc,kc->ijkd', l2, t1) l2t1 = lib.dot(l2.reshape(-1, nvir), t1.T).reshape(nocc, nocc, nvir, nocc) l2t1 = _cp(l2t1.transpose(1, 0, 3, 2)) max_memory = max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2 + nocc * nvir**2, nocc * nvir**2 * 5) blksize = min(nocc, max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit))) log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nocc, blksize): eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) l1new[p0:p1] += numpy.einsum('iabc,bc->ia', eris_ovvv, mba1) * 2 l1new[p0:p1] -= numpy.einsum('ibca,bc->ia', eris_ovvv, mba1) #:l2new[p0:p1] += numpy.einsum('ic,jbac->jiba', l1, eris_ovvv) tmp = lib.dot(l1, eris_ovvv.reshape(-1, nvir).T) l2new[p0:p1] += tmp.reshape(nocc, -1, nvir, nvir).transpose(1, 0, 2, 3) tmp = None m4buf = numpy.empty((blksize, nocc, nvir, nvir)) eris_ovvv = _cp(eris_ovvv.transpose(0, 2, 1, 3).reshape(-1, nvir**2)) for j0, j1 in prange(0, nocc, blksize): #:m4 = numpy.einsum('ijkd,kadb->ijab', l2t1[j0:j1,:,p0:p1], eris_ovvv) m4 = m4buf[:j1 - j0] lib.dot(_cp(l2t1[j0:j1, :, p0:p1].reshape((j1 - j0) * nocc, -1)), eris_ovvv, 1, m4.reshape(-1, nvir**2)) l2new[j0:j1] -= m4 l1new[j0:j1] -= numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijab,ia->jb', m4, t1[j0:j1]) * 2 l1new += numpy.einsum('jiab,jb->ia', m4, t1[j0:j1]) l1new[j0:j1] += numpy.einsum('jiab,ia->jb', m4, t1) eris_ovvv = m4buf = m4 = None #==== mem usage nvir**3*2 + nocc*nvir**2 eris_ovov = _cp(eris.ovov[p0:p1]) l1new[p0:p1] += numpy.einsum('jb,iajb->ia', l1, eris_ovov) * 2 for i in range(p1 - p0): l2new[p0 + i] += eris_ovov[i].transpose(1, 0, 2) * .5 #:l2new[p0:p1] -= numpy.einsum('icjb,ca->ijab', eris_ovov, mba1) #:l2new[p0:p1] -= numpy.einsum('jbka,ik->jiba', eris_ovov, mij1) tmp = numpy.empty((nocc, nvir, nvir)) for j in range(p0, p1): lib.dot(eris_ovov[j - p0].reshape(nvir, -1).T, mba1, 1, tmp.reshape(-1, nvir)) l2new[j] -= tmp.transpose(0, 2, 1) lib.dot( mij1, _cp(eris_ovov[j - p0].transpose(1, 0, 2).reshape(nocc, -1)), -1, l2new[j].reshape(nocc, -1), 1) tmp = None l1new[p0:p1] += numpy.einsum('iajb,jb->ia', eris_ovov, mia1) * 2 l1new[p0:p1] -= numpy.einsum('ibja,jb->ia', eris_ovov, mia1) m4buf = numpy.empty((blksize, nocc, nvir, nvir)) for j0, j1 in prange(0, nocc, blksize): #:m4 = numpy.einsum('kalb,ijkl->ijab', eris_ovov, l2tau[j0:j1,:,p0:p1]) m4 = m4buf[:j1 - j0] lib.dot( l2tau[j0:j1, :, p0:p1].reshape((j1 - j0) * nocc, -1).copy(), _cp(eris_ovov.transpose(0, 2, 1, 3).reshape(-1, nvir**2)), .5, m4.reshape(-1, nvir**2)) l2new[j0:j1] += m4 l1new[j0:j1] += numpy.einsum('ijab,jb->ia', m4, t1) * 4 l1new[j0:j1] -= numpy.einsum('ijba,jb->ia', m4, t1) * 2 eris_ovov = m4buf = m4 = None #==== mem usage nocc*nvir**2 * 3 eris_oovv = _cp(eris.oovv[p0:p1]) l1new[p0:p1] -= numpy.einsum('jb,ijba->ia', l1, eris_oovv) eris_oovv = None saved_wooov = _cp(saved.wooov[p0:p1]) #:l1new[p0:p1] -= numpy.einsum('jkca,ijkc->ia', l2, saved_wooov) l1new[p0:p1] -= lib.dot(saved_wooov.reshape(p1 - p0, -1), l2.reshape(-1, nvir)) saved_wovvv = _cp(saved.wovvv[p0:p1]) #:l1new += numpy.einsum('kibc,kabc->ia', l2[p0:p1], saved_wovvv) for j in range(p1 - p0): lib.dot(l2[p0 + j].reshape(nocc, -1), saved_wovvv[j].reshape(nvir, -1).T, 1, l1new, 1) saved_wooov = saved_wovvv = None #==== mem usage nvir**3 + nocc**2*nvir saved_wOvOv = _cp(saved.wOvOv[p0:p1]) tmp_ovov = _cp(saved.wOVov[p0:p1]) * 2 tmp_ovov += saved_wOvOv tmp_ovov = lib.transpose(tmp_ovov.reshape(-1, nov)).reshape( nocc, nvir, -1, nvir) tmp1 = numpy.empty((p1 - p0, nvir, nocc, nvir)) tmp = numpy.empty((blksize, nvir, nocc, nvir)) for j0, j1 in prange(0, nocc, blksize): #:tmp = l2[j0:j1].transpose(0,2,1,3) - l2[j0:j1].transpose(0,3,1,2)*.5 #:l2new[p0:p1] += numpy.einsum('kcia,kcjb->jiba', tmp, tmp_ovov[j0:j1]) for i in range(j1 - j0): tmp[i] = -.5 * l2[j0 + i].transpose(2, 0, 1) tmp[i] += l2[j0 + i].transpose(1, 0, 2) lib.dot(tmp_ovov[j0:j1].reshape((j1 - j0) * nvir, -1).T, tmp[:j1 - j0].reshape((j1 - j0) * nvir, -1), 1, tmp1.reshape(-1, nov)) l2new[p0:p1] += tmp1.transpose(0, 2, 1, 3) tmp = tmp1 = tmp_ovov = None #==== mem usage nocc*nvir**2 * 5 #:tmp = numpy.einsum('jkca,ibkc->ijab', l2, saved_wOvOv) tmp = numpy.empty((p1 - p0, nvir, nvir)) for j in range(nocc): lib.dot(saved_wOvOv.reshape(-1, nov), l2[j].reshape(nov, -1), 1, tmp.reshape(-1, nvir)) l2new[p0:p1, j] += tmp.transpose(0, 2, 1) l2new[p0:p1, j] += tmp * .5 saved_wOvOv = tmp = None saved_woooo = _cp(saved.woooo[p0:p1]) #:m3 = numpy.einsum('klab,ijkl->ijab', l2, saved_woooo) m3 = lib.dot(saved_woooo.reshape(-1, nocc**2), l2.reshape(nocc**2, -1), .5).reshape(-1, nocc, nvir, nvir) l2new[p0:p1] += m3 l1new[p0:p1] += numpy.einsum('ijab,jb->ia', m3, t1) * 4 l1new[p0:p1] -= numpy.einsum('ijba,jb->ia', m3, t1) * 2 saved_woooo = m3 = None time1 = log.timer_debug1('lambda pass [%d:%d]' % (p0, p1), *time1) mo_e = eris.fock.diagonal() eia = lib.direct_sum('i-a->ia', mo_e[:nocc], mo_e[nocc:]) l1new /= eia l1new += l1 # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 ij = 0 for i in range(nocc): for j in range(i): dab = lib.direct_sum('a+b->ab', eia[i], eia[j]) tmp = (l2new[i, j] + l2new[j, i].T) / dab + l2[i, j] l2new[i, j] = tmp l2new[j, i] = tmp.T ij += 1 dab = lib.direct_sum('a+b->ab', eia[i], eia[i]) l2new[i, i] = (l2new[i, i] + l2new[i, i].T) / dab + l2[i, i] ij += 1 time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def gamma2_outcore(mycc, t1, t2, l1, l2, h5fobj): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir+1) //2 dovov = h5fobj.create_dataset('dovov', (nocc,nvir,nocc,nvir), 'f8') dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair,nvir_pair), 'f8') doooo = h5fobj.create_dataset('doooo', (nocc,nocc,nocc,nocc), 'f8') doovv = h5fobj.create_dataset('doovv', (nocc,nocc,nvir,nvir), 'f8') dovvo = h5fobj.create_dataset('dovvo', (nocc,nvir,nvir,nocc), 'f8') dooov = h5fobj.create_dataset('dooov', (nocc,nocc,nocc,nvir), 'f8') _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) mOvOv = fswap.create_dataset('mOvOv', (nocc,nvir,nocc,nvir), 'f8') mOVov = fswap.create_dataset('mOVov', (nocc,nvir,nocc,nvir), 'f8') moo = numpy.empty((nocc,nocc)) mvv = numpy.zeros((nvir,nvir)) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc*nvir**2 * 5 blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) log.debug1('rdm intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc+blksize-1)/blksize)) time1 = time.clock(), time.time() for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): #:theta = make_theta(t2[p0:p1]) #:pOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2[p0:p1]) #:pOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2[p0:p1]) #:pOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) pOvOv = numpy.empty((nocc,p1-p0,nvir,nvir)) pOVov = numpy.empty((nocc,p1-p0,nvir,nvir)) t2a = numpy.empty((p1-p0,nvir,nocc,nvir)) t2b = numpy.empty((p1-p0,nvir,nocc,nvir)) theta = make_theta(t2[p0:p1]) tmp = numpy.empty_like(t2a) for i in range(p1-p0): t2a[i] = t2[p0+i].transpose(2,0,1) t2b[i] = t2[p0+i].transpose(1,0,2) tmp[i] = theta[i].transpose(1,0,2) t2a = t2a.reshape(-1,nov) t2b = t2b.reshape(-1,nov) theta, tmp = tmp.reshape(-1,nov), None for i in range(nocc): pOvOv[i] = lib.dot(t2a, l2[i].reshape(nov,-1)).reshape(-1,nvir,nvir) pOVov[i] = lib.dot(t2b, l2[i].reshape(nov,-1), -1).reshape(-1,nvir,nvir) pOVov[i] += lib.dot(theta, _cp(l2[i].transpose(0,2,1).reshape(nov,-1))).reshape(-1,nvir,nvir) theta = t2a = t2b = None mOvOv[p0:p1] = pOvOv.transpose(1,2,0,3) mOVov[p0:p1] = pOVov.transpose(1,2,0,3) fswap['mvOvO/%d'%istep] = pOvOv.transpose(3,1,2,0) fswap['mvOVo/%d'%istep] = pOVov.transpose(3,1,2,0) moo[p0:p1] =(numpy.einsum('ljdd->jl', pOvOv) * 2 + numpy.einsum('ljdd->jl', pOVov)) mvv +=(numpy.einsum('llbd->bd', pOvOv[p0:p1]) * 2 + numpy.einsum('llbd->bd', pOVov[p0:p1])) pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass1 [%d:%d]'%(p0, p1), *time1) mia =(numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo*.5 gooov = numpy.einsum('ji,ka->jkia', moo*-.5, t1) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc**3 + nocc**2*nvir + nocc*nvir**2*6 blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) log.debug1('rdm intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc+blksize-1)/blksize)) for p0, p1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[p0:p1], t1[p0:p1], t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1,nvir**2), l2.reshape(-1,nvir**2).T, .5) goooo = goooo.reshape(-1,nocc,nocc,nocc) h5fobj['doooo'][p0:p1] = make_theta(goooo).transpose(0,2,1,3) #:gooov[p0:p1] -= numpy.einsum('ib,jkba->jkia', l1, tau) #:gooov[p0:p1] -= numpy.einsum('jkba,ib->jkia', l2[p0:p1], t1) #:gooov[p0:p1] += numpy.einsum('jkil,la->jkia', goooo, t1*2) for i in range(p0,p1): gooov[i] -= lib.dot(_cp(tau[i-p0].transpose(0,2,1).reshape(-1,nvir)), l1.T).reshape(nocc,nvir,nocc).transpose(0,2,1) gooov[i] -= lib.dot(_cp(l2[i].transpose(0,2,1).reshape(-1,nvir)), t1.T).reshape(nocc,nvir,nocc).transpose(0,2,1) lib.dot(goooo.reshape(-1,nocc), t1, 2, gooov[p0:p1].reshape(-1,nvir), 1) #:goovv -= numpy.einsum('jk,ikab->ijab', mij, tau) goovv = numpy.einsum('ia,jb->ijab', mia[p0:p1], t1) for i in range(p1-p0): lib.dot(mij, tau[i].reshape(nocc,-1), -1, goovv[i].reshape(nocc,-1), 1) goovv[i] += .5 * l2[p0+i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2[p0:p1]) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2[p0:p1].reshape(-1,nvir), mab, -1, goovv.reshape(-1,nvir), 1) lib.dot(tau.reshape(-1,nvir), mvv.T, -.5, goovv.reshape(-1,nvir), 1) tau = None #==== mem usage nocc**3 + nocc*nvir**2 pOvOv = _cp(mOvOv[p0:p1]) pOVov = _cp(mOVov[p0:p1]) #:gooov[p0:p1,:] += numpy.einsum('jaic,kc->jkia', pOvOv, t1) #:gooov[:,p0:p1] -= numpy.einsum('kaic,jc->jkia', pOVov, t1) tmp = lib.dot(pOvOv.reshape(-1,nvir), t1.T).reshape(p1-p0,-1,nocc,nocc) gooov[p0:p1,:] += tmp.transpose(0,3,2,1) lib.dot(t1, pOVov.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1), 0) gooov[:,p0:p1] -= tmp.reshape(nocc,p1-p0,nvir,nocc).transpose(0,1,3,2) #:tmp = numpy.einsum('ikac,jc->jika', l2, t1[p0:p1]) #:gOvVo -= numpy.einsum('jika,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jkia,kb->jabi', tmp, t1) + pOvOv.transpose(0,3,1,2) tmp = tmp.reshape(-1,nocc,nocc,nvir) lib.dot(t1[p0:p1], l2.reshape(-1,nvir).T, 1, tmp.reshape(p1-p0,-1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1[p0:p1]) gOvvO = numpy.empty((p1-p0,nvir,nvir,nocc)) for i in range(p1-p0): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0,2,1).reshape(-1,nocc)), t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvVo[i] += pOVov[i].transpose(2,0,1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc,-1).T, t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvvO[i] += pOvOv[i].transpose(2,0,1) tmp = None #==== mem usage nocc**3 + nocc*nvir**6 dovvo[p0:p1] = (gOvVo*2 + gOvvO).transpose(0,2,1,3) gOvvO *= -2 gOvvO -= gOvVo doovv[p0:p1] = gOvvO.transpose(0,3,1,2) gOvvO = gOvVo = None for j0, j1 in prange(0, nocc, blksize): tau2 = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo[:,:,j0:j1].copy().reshape((p1-p0)*nocc,-1), tau2.reshape(-1,nvir**2), 1, goovv.reshape(-1,nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1[j0:j1], t1) tau2 = _cp(tau2.transpose(0,3,1,2).reshape(-1,nov)) #:goovv[:,j0:j1] += numpy.einsum('ibld,jlda->ijab', pOvOv, tau2) * .5 #:goovv[:,j0:j1] -= numpy.einsum('iald,jldb->ijab', pOVov, tau2) * .5 goovv[:,j0:j1] += lib.dot(pOvOv.reshape(-1,nov), tau2.T, .5).reshape(p1-p0,nvir,-1,nvir).transpose(0,2,3,1) goovv[:,j0:j1] += lib.dot(pOVov.reshape(-1,nov), tau2.T, -.5).reshape(p1-p0,nvir,-1,nvir).transpose(0,2,1,3) tau2 = None #==== mem usage nocc**3 + nocc*nvir**2*7 #:goovv += numpy.einsum('iald,jlbd->ijab', pOVov*2+pOvOv, t2) * .5 pOVov *= 2 pOVov += pOvOv for j in range(nocc): tmp = lib.dot(pOVov.reshape(-1,nov), _cp(t2[j].transpose(0,2,1).reshape(-1,nvir)), .5) goovv[:,j] += tmp.reshape(-1,nvir,nvir) tmp = None dovov[p0:p1] = make_theta(goovv).transpose(0,2,1,3) goooo = goovv = pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass2 [%d:%d]'%(p0, p1), *time1) h5fobj['dooov'][:] = gooov.transpose(0,2,1,3)*2 - gooov.transpose(1,2,0,3) gooov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nocc**2*nvir*2+nocc*nvir**2*2, nvir**3*2+nocc*nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) iobuflen = int(256e6/8/blksize) log.debug1('rdm intermediates pass 3: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir+blksize-1)/blksize)) h5fobj.create_group('dovvv') for istep, (p0, p1) in enumerate(prange(0, nvir, blksize)): pvOvO = numpy.empty((p1-p0,nocc,nvir,nocc)) pvOVo = numpy.empty((p1-p0,nocc,nvir,nocc)) ao2mo.outcore._load_from_h5g(fswap['mvOvO'], p0, p1, pvOvO) ao2mo.outcore._load_from_h5g(fswap['mvOVo'], p0, p1, pvOVo) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) gvovv = lib.dot(pvOVo.reshape(-1,nocc), t1).reshape(-1,nocc,nvir,nvir) for i in range(p1-p0): gvovv[i] = gvovv[i].transpose(0,2,1) lib.dot(pvOvO.reshape(-1,nocc), t1, -1, gvovv.reshape(-1,nvir), 1) pvOvO = pvOVo = None #==== mem usage nocc**2*nvir*2 + nocc*nvir**2*2 l2tmp = l2[:,:,p0:p1] * .5 #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2tmp, t2) #:jabc = numpy.einsum('ijab,ic->jabc', l2tmp, t1) #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) gvvvv = lib.dot(l2tmp.reshape(nocc**2,-1).T, t2.reshape(nocc**2,-1)) jabc = lib.dot(l2tmp.reshape(nocc,-1).T, t1) lib.dot(jabc.reshape(nocc,-1).T, t1, 1, gvvvv.reshape(-1,nvir), 1) gvvvv = gvvvv.reshape(-1,nvir,nvir,nvir) l2tmp = jabc = None #:gvovv = numpy.einsum('ja,jibc->aibc', l1[:,p0:p1], t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1[:,p0:p1]) lib.dot(l1[:,p0:p1].copy().T, t2.reshape(nocc,-1), 1, gvovv.reshape(p1-p0,-1), 1) lib.dot(t1[:,p0:p1].copy().T, l2.reshape(nocc,-1), 1, gvovv.reshape(p1-p0,-1), 1) tmp = numpy.einsum('ja,jb->ab', l1[:,p0:p1], t1) gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) gvovv += numpy.einsum('ba,ic->aibc', mvv[:,p0:p1]*.5, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) for j in range(p1-p0): lib.dot(t1, gvvvv[j].reshape(nvir,-1), -2, gvovv[j].reshape(nocc,-1), 1) # symmetrize dvvvv because it is symmetrized in ccsd_grad and make_rdm2 anyway #:dvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(dvvvv+dvvvv.transpose(1,0,3,2)) # now dvvvv == dvvvv.transpose(2,3,0,1) == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir,nvir,nvir)) tmp1 = numpy.empty((nvir,nvir,nvir)) tmpvvvv = numpy.empty((p1-p0,nvir,nvir_pair)) for i in range(p1-p0): make_theta(gvvvv[i:i+1], out=tmp) tmp1[:] = tmp.transpose(1,0,2) _ccsd.precontract(tmp1, diag_fac=2, out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i-p0,j] += tmpvvvv[j-p0,i] tmpvvvv[i-p0,i] *= 2 for i in range(p0, p1): off = i * (i+1) // 2 if p0 > 0: tmpvvvv[i-p0,:p0] += dvvvv[off:off+p0] dvvvv[off:off+i+1] = tmpvvvv[i-p0,:i+1] * .25 for i in range(p1, nvir): off = i * (i+1) // 2 dvvvv[off+p0:off+p1] = tmpvvvv[:,i] tmp = tmp1 = tmpvvvv = None #==== mem usage nvir**3 + nocc*nvir**2 gvvov = make_theta(gvovv).transpose(0,2,1,3) ao2mo.outcore._transpose_to_h5g(h5fobj, 'dovvv/%d'%istep, gvvov.reshape(-1,nov), iobuflen) gvvvv = None gvovv = None time1 = log.timer_debug1('rdm intermediates pass3 [%d:%d]'%(p0, p1), *time1) del(fswap['mOvOv']) del(fswap['mOVov']) del(fswap['mvOvO']) del(fswap['mvOVo']) fswap.close() _tmpfile = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], None, h5fobj['dovvv'], h5fobj['dooov'])
def make_intermediates(mycc, t1, t2, eris): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvo = eris.fock[nocc:,:nocc] fvv = eris.fock[nocc:,nocc:] tau = _ccsd.make_tau(t2, t1, t1) ovov = np.asarray(eris.ovov) ovoo = np.asarray(eris.ovoo) ovov1 = ovov * 2 - ovov.transpose(0,3,2,1) ovoo1 = ovoo * 2 - ovoo.transpose(2,1,0,3) v1 = fvv - lib.einsum('ja,jb->ba', fov, t1) v1 -= lib.einsum('jakc,jkbc->ba', ovov1, tau) v2 = foo + lib.einsum('ib,jb->ij', fov, t1) v2 += lib.einsum('ibkc,jkbc->ij', ovov1, tau) v2 += np.einsum('kbij,kb->ij', ovoo1, t1) v4 = fov + np.einsum('jbkc,kc->jb', ovov1, t1) v5 = np.einsum('kc,jkbc->bj', fov, t2) * 2 v5 -= np.einsum('kc,jkcb->bj', fov, t2) v5 += fvo v5 += lib.einsum('kc,kb,jc->bj', v4, t1, t1) v5 -= lib.einsum('lckj,klbc->bj', ovoo1, t2) oooo = np.asarray(eris.oooo) woooo = lib.einsum('icjl,kc->ikjl', ovoo, t1) woooo += lib.einsum('jcil,kc->iljk', ovoo, t1) woooo += oooo.copy() woooo += lib.einsum('icjd,klcd->ikjl', ovov, tau) theta = t2*2 - t2.transpose(0,1,3,2) v4OVvo = lib.einsum('ldjb,klcd->jbck', ovov1, t2) v4OVvo -= lib.einsum('ldjb,kldc->jbck', ovov, t2) v4OVvo += np.asarray(eris.ovvo) v4oVVo = lib.einsum('jdlb,kldc->jbck', ovov, t2) v4oVVo -= np.asarray(eris.oovv).transpose(0,3,2,1) v4ovvo = v4OVvo*2 + v4oVVo w3 = np.einsum('jbck,jb->ck', v4ovvo, t1) woovo = lib.einsum('ibck,jb->ijck', v4ovvo, t1) woovo = woovo - woovo.transpose(0,3,2,1) woovo += lib.einsum('ibck,jb->ikcj', v4OVvo-v4oVVo, t1) woovo += ovoo1.conj().transpose(3,2,1,0) woovo += lib.einsum('lcik,jlbc->ikbj', ovoo1, theta) woovo -= lib.einsum('lcik,jlbc->ijbk', ovoo1, t2) woovo -= lib.einsum('iclk,ljbc->ijbk', ovoo1, t2) wvvvo = lib.einsum('jack,jb->back', v4ovvo, t1) wvvvo = wvvvo - wvvvo.transpose(2,1,0,3) wvvvo += lib.einsum('jack,jb->cabk', v4OVvo-v4oVVo, t1) wvvvo -= lib.einsum('lajk,jlbc->cabk', ovoo1, tau) wOVvo = v4OVvo woVVo = v4oVVo wOVvo -= np.einsum('jbld,kd,lc->jbck', ovov, t1, t1) woVVo += np.einsum('jdlb,kd,lc->jbck', ovov, t1, t1) wOVvo -= lib.einsum('jblk,lc->jbck', ovoo, t1) woVVo += lib.einsum('lbjk,lc->jbck', ovoo, t1) v4ovvo = v4OVvo = v4oVVo = None ovvv = np.asarray(eris.get_ovvv()) wvvvo += lib.einsum('kacd,kjbd->bacj', ovvv, t2) * 1.5 wOVvo += lib.einsum('jbcd,kd->jbck', ovvv, t1) woVVo -= lib.einsum('jdcb,kd->jbck', ovvv, t1) ovvv = ovvv*2 - ovvv.transpose(0,3,2,1) v1 += np.einsum('jcba,jc->ba', ovvv, t1) v5 += lib.einsum('kdbc,jkcd->bj', ovvv, t2) woovo += lib.einsum('idcb,jkdb->ijck', ovvv, tau) tmp = lib.einsum('kdca,jkbd->cabj', ovvv, theta) wvvvo -= tmp wvvvo += tmp.transpose(2,1,0,3) * .5 wvvvo -= ovvv.conj().transpose(3,2,1,0) ovvv = tmp = None w3 += v5 w3 += np.einsum('cb,jb->cj', v1, t1) w3 -= np.einsum('jk,jb->bk', v2, t1) class _IMDS: pass imds = _IMDS() imds.ftmp = lib.H5TmpFile() dtype = np.result_type(t2, eris.vvvv).char imds.woooo = imds.ftmp.create_dataset('woooo', (nocc,nocc,nocc,nocc), dtype) imds.wovvo = imds.ftmp.create_dataset('wovvo', (nocc,nvir,nvir,nocc), dtype) imds.woVVo = imds.ftmp.create_dataset('woVVo', (nocc,nvir,nvir,nocc), dtype) imds.woovo = imds.ftmp.create_dataset('woovo', (nocc,nocc,nvir,nocc), dtype) imds.wvvvo = imds.ftmp.create_dataset('wvvvo', (nvir,nvir,nvir,nocc), dtype) imds.woooo[:] = woooo imds.wovvo[:] = wOVvo*2 + woVVo imds.woVVo[:] = woVVo imds.woovo[:] = woovo imds.wvvvo[:] = wvvvo imds.v1 = v1 imds.v2 = v2 imds.w3 = w3 imds.v4 = v4 imds.ftmp.flush() return imds
def make_tau(t2, t1a, t1b, fac=1, out=None): return _ccsd.make_tau(t2, t1a, t1b, fac, out)
def gamma2_incore(mycc, t1, t2, l1, l2): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir time1 = time.clock(), time.time() #:theta = make_theta(t2) #:mOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2) #:mOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2) #:mOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) l2a = numpy.empty((nocc,nvir,nocc,nvir)) t2a = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): l2a[i] = l2[i].transpose(2,0,1) t2a[i] = t2[i].transpose(2,0,1) mOvOv = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T).reshape(nocc,nvir,nocc,nvir) for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) mOVov = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T, -1).reshape(nocc,nvir,nocc,nvir) theta = t2a for i in range(nocc): l2a[i] = l2[i].transpose(1,0,2) theta[i] *= 2 theta[i] -= t2[i].transpose(2,0,1) lib.dot(theta.reshape(-1,nov), l2a.reshape(nov,-1).T, 1, mOVov.reshape(nov,-1), 1) theta = l2a = t2a = None moo =(numpy.einsum('jdld->jl', mOvOv) * 2 + numpy.einsum('jdld->jl', mOVov)) mvv =(numpy.einsum('lbld->bd', mOvOv) * 2 + numpy.einsum('lbld->bd', mOVov)) mia =(numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo*.5 gooov = numpy.zeros((nocc,nocc,nocc,nvir)) tau = _ccsd.make_tau(t2, t1, t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1,nvir**2), l2.reshape(-1,nvir**2).T, .5) goooo = goooo.reshape(-1,nocc,nocc,nocc) doooo = _cp(make_theta(goooo).transpose(0,2,1,3)) #:gooov -= numpy.einsum('ib,kjab->jkia', l1, tau) #:gooov -= numpy.einsum('kjab,ib->jkia', l2, t1) #:gooov += numpy.einsum('jkil,la->jkia', goooo, t1*2) gooov = lib.dot(_cp(tau.reshape(-1,nvir)), l1.T, -1) lib.dot(_cp(l2.reshape(-1,nvir)), t1.T, -1, gooov, 1) gooov = gooov.reshape(nocc,nocc,nvir,nocc) tmp = numpy.einsum('ji,ka->jkia', moo*-.5, t1) tmp += gooov.transpose(1,0,3,2) gooov, tmp = tmp, None lib.dot(goooo.reshape(-1,nocc), t1, 2, gooov.reshape(-1,nvir), 1) goovv = numpy.einsum('ia,jb->ijab', mia, t1) for i in range(nocc): goovv[i] += .5 * l2 [i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('jk,kiba->jiba', mij, tau) lib.dot(mij, tau.reshape(nocc,-1), -1, goovv.reshape(nocc,-1), 1) #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2.reshape(-1,nvir), mab, -1, goovv.reshape(-1,nvir), 1) lib.dot(tau.reshape(-1,nvir), mvv.T, -.5, goovv.reshape(-1,nvir), 1) tau = None #:gooov += numpy.einsum('jaic,kc->jkia', mOvOv, t1) #:gooov -= numpy.einsum('kaic,jc->jkia', mOVov, t1) tmp = lib.dot(mOvOv.reshape(-1,nvir), t1.T).reshape(nocc,-1,nocc,nocc) gooov += tmp.transpose(0,3,2,1) lib.dot(t1, mOVov.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1), 0) gooov -= tmp.reshape(nocc,nocc,nvir,nocc).transpose(0,1,3,2) dooov = gooov.transpose(0,2,1,3)*2 - gooov.transpose(1,2,0,3) gooov = None #:tmp = numpy.einsum('ikac,jc->jaik', l2, t1) #:gOvVo -= numpy.einsum('jaik,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jaki,kb->jabi', tmp, t1) + mOvOv.transpose(0,3,1,2) tmp = tmp.reshape(nocc,nocc,nocc,nvir) lib.dot(t1, l2.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1) gOvvO = numpy.empty((nocc,nvir,nvir,nocc)) for i in range(nocc): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0,2,1).reshape(-1,nocc)), t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvVo[i] += mOVov[i].transpose(2,0,1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc,-1).T, t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvvO[i] += mOvOv[i].transpose(2,0,1) tmp = None dovvo = numpy.empty((nocc,nvir,nvir,nocc)) doovv = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): tmp = gOvVo[i] * 2 + gOvvO[i] dovvo[i] = tmp.transpose(1,0,2) tmp = gOvvO[i] * -2 - gOvVo[i] doovv[i] = tmp.transpose(2,0,1) gOvvO = gOvVo = None tau2 = _ccsd.make_tau(t2, t1, t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo.reshape(nocc*nocc,-1), tau2.reshape(-1,nvir**2), 1, goovv.reshape(-1,nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1, t1) tau2p = tau2.reshape(nocc,nvir,nocc,nvir) for i in range(nocc): tau2p[i] = tau2[i].transpose(2,0,1) tau2, tau2p = tau2p.reshape(nov,-1), None #:goovv += numpy.einsum('ibld,jlda->ijab', mOvOv, tau2) * .5 #:goovv -= numpy.einsum('iald,jldb->ijab', mOVov, tau2) * .5 tmp = lib.dot(mOvOv.reshape(-1,nov), tau2.T, .5).reshape(nocc,nvir,-1,nvir) for i in range(nocc): tmp[i] = goovv[i].transpose(1,0,2) + tmp[i].transpose(2,1,0) goovv, tmp = tmp, None lib.dot(mOVov.reshape(-1,nov), tau2.T, -.5, goovv.reshape(nov,-1), 1) #:goovv += numpy.einsum('iald,jlbd->ijab', mOVov*2+mOvOv, t2) * .5 t2a, tau2 = tau2.reshape(nocc,nvir,nocc,nvir), None for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) tmp = mOVov*2 tmp += mOvOv lib.dot(tmp.reshape(-1,nov), t2a.reshape(nov,-1), .5, goovv.reshape(nov,-1), 1) t2a = tmp = None for i in range(nocc): goovv[i] = goovv[i] * 2 - goovv[i].transpose(2,1,0) dovov = goovv goooo = goovv = None #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) mOVov = lib.transpose(mOVov.reshape(nov,-1)) gvovv = lib.dot(mOVov.reshape(nocc,-1).T, t1).reshape(nvir,nocc,nvir,nvir) mOVov = None tmp = numpy.einsum('ja,jb->ab', l1, t1) #:gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) #:gvovv += numpy.einsum('ba,ic->aibc', mvv, t1*.5) for i in range(nvir): gvovv[i] += numpy.einsum('b,ic->icb', tmp[i], t1) gvovv[i] += numpy.einsum('b,ic->icb', mvv[:,i]*.5, t1) gvovv[i] = gvovv[i].transpose(0,2,1) #:gvovv += numpy.einsum('ja,jibc->aibc', l1, t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) mOvOv = lib.transpose(mOvOv.reshape(nov,-1)) lib.dot(mOvOv.reshape(nocc,-1).T, t1, -1, gvovv.reshape(-1,nvir), 1) mOvOv = None lib.dot(l1.T, t2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) lib.dot(t1.T, l2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) tmp = numpy.empty((nocc,nvir,nvir)) for i in range(nvir): #:gvovv*2 - gvovv.transpose(0,1,3,2) gvovv[i] = _ccsd.make_021(gvovv[i], gvovv[i], 2, -1, out=tmp) #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2, t2)*.5 #:jabc = numpy.einsum('ijab,ic->jabc', l2, t1) * .5 #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) tau = _ccsd.make_tau(t2, t1, t1) theta = make_theta(tau) tau = None l2tmp = lib.pack_tril(l2.reshape(-1,nvir,nvir)) gtmp = lib.dot(l2tmp.T, theta.reshape(nocc**2,-1), .5).reshape(-1,nvir,nvir) l2tmp = theta = None nvir_pair = nvir * (nvir+1) //2 tmp = numpy.empty((nvir,nvir,nvir)) tmp1 = numpy.empty((nvir,nvir,nvir)) tmptril = numpy.empty((nvir,nvir_pair)) diag_idx = numpy.arange(nvir) diag_idx = diag_idx*(diag_idx+1)//2 + diag_idx dvvvv = numpy.empty((nvir_pair,nvir_pair)) dovvv = numpy.empty((nocc,nvir,nvir,nvir)) # dvvov = (gvovv*2 - gvovv.transpose(0,1,3,2)).transpose(0,2,1,3) # dovvv = dvvov.transpose(2,3,0,1) p0 = 0 for i in range(nvir): tmp[:i+1] = gtmp[p0:p0+i+1] for j in range(i+1, nvir): tmp[j] = gtmp[j*(j+1)//2+i].T lib.dot(t1, tmp.reshape(nvir,-1), -2, gvovv[i].reshape(nocc,-1), 1) dovvv[:,:,i] = gvovv[i].transpose(0,2,1) #:gvvvv[i] = (tmp*2-tmp.transpose(0,2,1)).transpose(1,0,2) #:gvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(gvvvv+gvvvv.transpose(1,0,3,2)) tmp1[:] = tmp.transpose(1,0,2) _ccsd.precontract(tmp1, diag_fac=2, out=tmptril) dvvvv[p0:p0+i] += tmptril[:i] dvvvv[p0:p0+i] *= .25 dvvvv[i*(i+1)//2+i] = tmptril[i] * .5 for j in range(i+1, nvir): dvvvv[j*(j+1)//2+i] = tmptril[j] p0 += i + 1 gtmp = tmp = tmp1 = tmptril = gvovv = None dvvov = dovvv.transpose(2,3,0,1) return (dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov)
def update_amps(mycc, t1, t2, l1, l2, eris=None, saved=None): if saved is None: saved = make_intermediates(mycc, t1, t2, eris) time1 = time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvv = eris.fock[:nocc,:nocc] #:mba = numpy.einsum('klca,klcb->ba', l2, t2*2-t2.transpose(0,1,3,2)) #:mij = numpy.einsum('ikcd,jkcd->ij', l2, t2*2-t2.transpose(0,1,3,2)) #:theta = t2*2 - t2.transpose(0,1,3,2) theta = _ccsd.make_0132(t2, t2, 2, -1) mba = lib.dot(theta.reshape(-1,nvir).T, l2.reshape(-1,nvir)) mij = lib.dot(l2.reshape(nocc,-1), theta.reshape(nocc,-1).T) theta = None mba1 = numpy.einsum('jc,jb->bc', l1, t1) + mba mij1 = numpy.einsum('kb,jb->kj', l1, t1) + mij mia1 =(t1 + numpy.einsum('kc,jkbc->jb', l1, t2) * 2 - numpy.einsum('kc,jkcb->jb', l1, t2) - reduce(numpy.dot, (t1, l1.T, t1)) - numpy.einsum('bd,jd->jb', mba, t1) - numpy.einsum('lj,lb->jb', mij, t1)) tmp = mycc.add_wvvVV(numpy.zeros_like(l1), l2, eris) l2new = numpy.empty((nocc,nocc,nvir,nvir)) ij = 0 for i in range(nocc): for j in range(i): tmp1 = tmp[ij] * .5 # *.5 because of l2+l2.transpose(1,0,3,2) later l2new[i,j] = tmp1 l2new[j,i] = tmp1.T ij += 1 l2new[i,i] = tmp[ij] * .5 ij += 1 l1new =(numpy.einsum('ijab,jb->ia', l2new, t1) * 4 - numpy.einsum('jiab,jb->ia', l2new, t1) * 2) tmp = tmp1 = None l1new += eris.fock[:nocc,nocc:] l1new += numpy.einsum('ib,ba->ia', l1, saved.w1) l1new -= numpy.einsum('ja,ij->ia', l1, saved.w2) l1new -= numpy.einsum('ik,ka->ia', mij, saved.w4) l1new -= numpy.einsum('ca,ic->ia', mba, saved.w4) l1new += numpy.einsum('ijab,bj->ia', l2, saved.w3) * 2 l1new -= numpy.einsum('ijba,bj->ia', l2, saved.w3) l2new += numpy.einsum('ia,jb->ijab', l1, saved.w4) #:l2new += numpy.einsum('jibc,ca->jiba', l2, saved.w1) #:l2new -= numpy.einsum('kiba,jk->jiba', l2, saved.w2) lib.dot(l2.reshape(-1,nvir), saved.w1, 1, l2new.reshape(-1,nvir), 1) lib.dot(saved.w2, l2.reshape(nocc,-1),-1, l2new.reshape(nocc,-1), 1) eris_ooov = _cp(eris.ooov) l1new -= numpy.einsum('jkia,kj->ia', eris_ooov, mij1) * 2 l1new += numpy.einsum('ikja,kj->ia', eris_ooov, mij1) #:l2new -= numpy.einsum('ka,kijb->jiba', l1, eris_ooov) lib.dot(_cp(eris_ooov.transpose(0,2,1,3).reshape(nocc,-1)).T, l1, -1, l2new.reshape(-1,nvir), 1) eris_ooov = None tau = _ccsd.make_tau(t2, t1, t1) #:l2tau = numpy.einsum('ijcd,klcd->ijkl', l2, tau) l2tau = lib.dot(l2.reshape(nocc**2,-1), tau.reshape(nocc**2,-1).T).reshape((nocc,)*4) tau = None l2t1 = numpy.einsum('ijcd,kc->ijkd', l2, t1) eris_ovvv = _cp(eris.ovvv) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape(nov,-1)) eris_ovvv = eris_ovvv.reshape(nocc,nvir,nvir,nvir) l1new += numpy.einsum('iabc,bc->ia', eris_ovvv, mba1) * 2 l1new -= numpy.einsum('ibca,bc->ia', eris_ovvv, mba1) #:l2new += numpy.einsum('ic,jbac->jiba', l1, eris_ovvv) tmp = lib.dot(l1, eris_ovvv.reshape(-1,nvir).T).reshape(nocc,-1,nvir,nvir) for i in range(nocc): l2new[i] += tmp[i].transpose(0,2,1) #:m4 = numpy.einsum('ijkd,kadb->ijab', l2t1, eris_ovvv) m4 = tmp lib.dot(_cp(l2t1.reshape(nocc*nocc,-1)), _cp(eris_ovvv.transpose(0,2,1,3).reshape(-1,nvir**2)), 1, m4.reshape(nocc*nocc,-1)) l2new -= m4 l1new -= numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijab,ia->jb', m4, t1) * 2 l1new += numpy.einsum('jiab,jb->ia', m4, t1) l1new += numpy.einsum('jiab,ia->jb', m4, t1) eris_ovvv = tmp = None eris_ovov = _cp(eris.ovov) l1new += numpy.einsum('jb,iajb->ia', l1, eris_ovov) * 2 #:l2new -= numpy.einsum('jbic,ca->jiba', eris_ovov, mba1) #:l2new -= numpy.einsum('kajb,ik->ijab', eris_ovov, mij1) tmp = lib.dot(eris_ovov.reshape(-1,nvir), mba1).reshape(nocc,nvir,nocc,nvir) lib.dot(mij1, eris_ovov.reshape(nocc,-1), 1, tmp.reshape(nocc,-1), 1) tmp_oovv = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): tmp_oovv[i] = eris_ovov[i].transpose(1,0,2) * .5 l2new[i] += tmp_oovv[i] l2new[i] -= tmp[i].transpose(1,0,2) tmp = None l1new += numpy.einsum('iajb,jb->ia', eris_ovov, mia1) * 2 l1new -= numpy.einsum('ibja,jb->ia', eris_ovov, mia1) #:m4 = numpy.einsum('kalb,ijkl->ijab', eris_ovov, l2tau) lib.dot(l2tau.reshape(nocc*nocc,-1), tmp_oovv.reshape(-1,nvir**2), 1, m4.reshape(nocc**2,-1)) l2new += m4 l1new += numpy.einsum('ijab,jb->ia', m4, t1) * 4 l1new -= numpy.einsum('ijba,jb->ia', m4, t1) * 2 eris_ovov = m4 = tmp_oovv = None eris_oovv = _cp(eris.oovv) l1new -= numpy.einsum('jb,ijba->ia', l1, eris_oovv) eris_oovv = None saved_wooov = _cp(saved.wooov) #:l1new -= numpy.einsum('jkca,ijkc->ia', l2, saved_wooov) l1new -= lib.dot(saved_wooov.reshape(nocc,-1), l2.reshape(-1,nvir)) saved_wovvv = _cp(saved.wovvv) #:l1new += numpy.einsum('kibc,kabc->ia', l2, saved_wovvv) for j in range(nocc): l1new += lib.dot(l2[j].reshape(nocc,-1), saved_wovvv[j].reshape(nvir,-1).T) saved_wooov = saved_wovvv = None saved_wOvOv = _cp(saved.wOvOv) tmp_ovov = _cp(saved.wOVov) * 2 tmp_ovov += saved_wOvOv #:tmp = l2.transpose(0,2,1,3) - l2.transpose(0,3,1,2)*.5 #:l2new += numpy.einsum('kcia,kcjb->jiba', tmp, tmp_ovov) tmp = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): tmp[i] = l2[i].transpose(2,0,1)*-.5 tmp[i] += l2[i].transpose(1,0,2) tmp = lib.dot(tmp_ovov.reshape(-1,nov), tmp.reshape(nov,-1)).reshape(-1,nvir,nocc,nvir) #:tmp = numpy.einsum('jkca,ibkc->ijab', l2, saved_wOvOv) for i in range(nocc): l2new[i] += tmp[i].transpose(1,0,2) tmp_ovov[i] = l2[i].transpose(2,0,1) lib.dot(saved_wOvOv.reshape(-1,nov), tmp_ovov.reshape(nov,-1), 1, tmp.reshape(nov,-1)) for i in range(nocc): l2new[i] += tmp[i].transpose(1,2,0) l2new[i] += tmp[i].transpose(1,0,2) * .5 saved_wOvOv = tmp = tmp_ovov = None saved_woooo = _cp(saved.woooo) #:m3 = numpy.einsum('klab,ijkl->ijab', l2, saved_woooo) m3 = lib.dot(saved_woooo.reshape(-1,nocc**2), l2.reshape(nocc**2,-1), .5).reshape(-1,nocc,nvir,nvir) l2new += m3 l1new += numpy.einsum('ijab,jb->ia', m3, t1) * 4 l1new -= numpy.einsum('ijba,jb->ia', m3, t1) * 2 saved_woooo = m3 = None mo_e = eris.fock.diagonal() eia = lib.direct_sum('i-j->ij', mo_e[:nocc], mo_e[nocc:]) l1new /= eia l1new += l1 # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 ij = 0 for i in range(nocc): for j in range(i): dab = lib.direct_sum('a+b->ab', eia[i], eia[j]) tmp = (l2new[i,j]+l2new[j,i].T) / dab + l2[i,j] l2new[i,j] = tmp l2new[j,i] = tmp.T ij += 1 dab = lib.direct_sum('a+b->ab', eia[i], eia[i]) l2new[i,i] = (l2new[i,i]+l2new[i,i].T)/dab + l2[i,i] ij += 1 time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def make_intermediates(mycc, t1, t2, eris): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvv = eris.fock[nocc:,nocc:] class _Saved(object): pass saved = _Saved() # As we don't have l2 in memory, hold tau temporarily in memory w1 = fvv - numpy.einsum('ja,jb->ba', fov, t1) w2 = foo + numpy.einsum('ib,jb->ij', fov, t1) w3 = _cp(numpy.einsum('kc,jkbc->bj', fov, t2) * 2 + fov.T) w3 -= numpy.einsum('kc,kjbc->bj', fov, t2) w3 += reduce(numpy.dot, (t1.T, fov, t1.T)) w4 = fov.copy() eris_ovvv = _cp(eris.ovvv) eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape(nov,-1)) eris_ovvv = eris_ovvv.reshape(nocc,nvir,nvir,nvir) wovvv = numpy.empty((nocc,nvir,nvir,nvir)) t2tmp = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): wovvv[i] = eris_ovvv[i].transpose(1,0,2) * 2 t2tmp[i] = t2[i].transpose(2,0,1) #:wovvv += numpy.einsum('jabd,kjdc->kabc', eris_ovvv, t2) * -1.5 tmp = lib.dot(t2tmp.reshape(nov,-1), wovvv.reshape(-1,nvir**2), -1.5/2).reshape(-1,nvir,nvir,nvir) g2ovvv = tmp for i in range(nocc): wovvv[i] -= eris_ovvv[i].transpose(1,2,0) wovvv[i] += tmp[i].transpose(1,2,0) g2ovvv[i] = eris_ovvv[i]*2 g2ovvv[i] -= eris_ovvv[i].transpose(1,2,0) tmp = t2tmp = None w1 += numpy.einsum('jcba,jc->ba', eris_ovvv, t1*2) w1 -= numpy.einsum('jabc,jc->ba', eris_ovvv, t1) #:w3 += numpy.einsum('kdcb,kjdc->bj', eris_ovvv, theta) theta = numpy.empty(t2.shape) for i in range(nocc): theta[i] = t2[i] * 2 theta[i] -= t2[i].transpose(0,2,1) lib.dot(eris_ovvv[i].reshape(-1,nvir).T, _cp(theta[i].reshape(nocc,-1)).T, 1, w3, 1) theta = _cp(theta.transpose(0,2,1,3)) #:vkbca = numpy.einsum('jdca,kbjd->kbca', g2ovvv, theta) vkbca = lib.dot(_cp(theta.reshape(nov,-1)), g2ovvv.reshape(-1,nvir*nvir)).reshape(-1,nvir,nvir,nvir) for i in range(nocc): wovvv[i] += vkbca[i].transpose(2,0,1) wovvv[i] -= vkbca[i].transpose(2,1,0) * .5 vkabc = None #:wOVov = numpy.einsum('jbcd,kd->jbkc', eris_ovvv, t1) #:wOvOv = numpy.einsum('jdcb,kd->jbkc', eris_ovvv, -t1) wOVov = lib.dot(eris_ovvv.reshape(-1,nvir), t1.T).reshape(-1,nvir,nvir,nocc).transpose(0,1,3,2).copy() for i in range(nocc): g2ovvv[i] = eris_ovvv[i].transpose(1,2,0) * 2 wOvOv = lib.dot(g2ovvv.reshape(-1,nvir), -t1.T, .5).reshape(-1,nvir,nvir,nocc).transpose(0,1,3,2).copy() for i in range(nocc): g2ovvv[i] -= eris_ovvv[i].transpose(1,0,2) eris_ovov = _cp(_cp(eris.ovov).transpose(0,2,1,3)) tau = _ccsd.make_tau(t2, t1, t1) #:wooov[:,j0:j1] = numpy.einsum('icbd,jkbd->ijkc', g2ovvv, tau) #:woooo[:,:,j0:j1] = numpy.einsum('icjd,klcd->ijkl', eris_ovov, tau) tmp = lib.dot(g2ovvv.reshape(-1,nvir**2), tau.reshape(-1,nvir**2).T) wooov = _cp(tmp.reshape(-1,nvir,nocc,nocc).transpose(0,2,3,1)) woooo = lib.dot(eris_ovov.reshape(-1,nvir**2), tau.reshape(-1,nvir**2).T).reshape(-1,nocc,nocc,nocc) eris_ovov = eris_ovvv = g2ovvv = tau = tmp = None eris_ooov = _cp(eris.ooov) eris_ovoo = _cp(eris.ovoo) #:woooo += numpy.einsum('icjl,kc->ijkl', eris_ovoo, t1) #:wOVov += numpy.einsum('jblk,lc->jbkc', eris_ovoo, -t1) for i in range(nocc): woooo[i] += lib.dot(t1, eris_ovoo[i].reshape(nvir,-1)).reshape((nocc,)*3).transpose(1,0,2) lib.dot(eris_ovoo.reshape(-1,nocc), t1, -1, wOVov.reshape(-1,nvir), 1) #:wooov -= numpy.einsum('ibjl,lkcb->ijkc', eris_ovoo*1.5, t2) t2tmp = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): t2tmp[i] = t2[i].transpose(2,0,1) tmp_ooov = _cp(-eris_ooov.transpose(2,0,1,3)).reshape(-1,nov) lib.dot(tmp_ooov, t2tmp.reshape(nov,-1), 1.5, wooov.reshape(-1,nov), 1) t2tmp = None g2ooov, tmp_ooov = tmp_ooov.reshape(nocc,nocc,nocc,nvir), None g2ooov += eris_ooov * 2 #:vikjc = numpy.einsum('iklb,jlcb->ikjc', g2ooov, theta) vikjc = lib.dot(g2ooov.reshape(-1,nov), theta.reshape(-1,nov).T) vikjc = vikjc.reshape(nocc,nocc,nocc,nvir) wooov += vikjc.transpose(0,2,1,3) wooov -= vikjc*.5 g2ooov = vikjc = eris_ovoo = None w2 += numpy.einsum('ijkb,kb->ij', eris_ooov, t1) * 2 w2 -= numpy.einsum('kjib,kb->ij', eris_ooov, t1) #:w3 -= numpy.einsum('kjlc,klbc->bj', eris_ooov, theta) for i in range(nocc): lib.dot(_cp(theta[i].transpose(1,2,0)).reshape(-1,nvir).T, eris_ooov[i].reshape(nocc,-1).T, -1, w3, 1) #:woooo += numpy.einsum('ikjc,lc->ijkl', eris_ooov, t1) #:wOvOv += numpy.einsum('jklb,lc->jbkc', eris_ooov, t1) woooo += lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((-1,nocc,nocc,nocc)).transpose(0,2,1,3) for i in range(nocc): lib.dot(_cp(eris_ooov[i].transpose(2,0,1)).reshape(-1,nocc), t1, 1, wOvOv[i].reshape(-1,nvir), 1) wooov[i] += eris_ooov[i].transpose(1,0,2)*2 wooov[i] -= eris_ooov[i] eris_ooov = theta = None eris_ovov = _cp(eris.ovov) g2ovov = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): g2ovov[i] = eris_ovov[i].transpose(1,0,2)*2 g2ovov[i] -= eris_ovov[i].transpose(1,2,0) tmpw4 = numpy.einsum('klcd,ld->kc', g2ovov, t1) #:w1 -= numpy.einsum('kcja,kjcb->ba', g2ovov, t2) w1 -= lib.dot(t2.reshape(-1,nvir).T, g2ovov.reshape(-1,nvir)) w1 -= numpy.einsum('ja,jb->ba', tmpw4, t1) #:w2 += numpy.einsum('ibkc,jkbc->ij', g2ovov, t2) w2 += lib.dot(g2ovov.reshape(nocc,-1), t2.reshape(nocc,-1).T) w2 += numpy.einsum('ib,jb->ij', tmpw4, t1) w3 += reduce(numpy.dot, (t1.T, tmpw4, t1.T)) w4 += tmpw4 vOVov = eris_ovov.copy() #:vOVov += numpy.einsum('jbld,klcd->jbkc', g2ovov, t2) #:vOVov -= numpy.einsum('jbld,kldc->jbkc', eris_ovov, t2) lib.dot(_cp(g2ovov.transpose(0,2,1,3)).reshape(-1,nov), _cp(t2.transpose(0,2,1,3).reshape(nov,-1).T), 1, vOVov.reshape(nov,-1), 1) lib.dot(eris_ovov.reshape(-1,nov), _cp(t2.transpose(0,3,1,2).reshape(nov,-1).T), -1, vOVov.reshape(nov,-1), 1) g2ovov = None #:tmp = numpy.einsum('jbld,kd->ljbk', eris_ovov, t1) #:wOVov -= numpy.einsum('ljbk,lc->jbkc', tmp, t1) #:tmp = numpy.einsum('jdlb,kd->ljbk', eris_ovov, t1) #:wOvOv += numpy.einsum('ljbk,lc->jbkc', tmp, t1) tmp = numpy.empty((nocc,nvir,nocc)) for j in range(nocc): lib.dot(_cp(eris_ovov[j].transpose(1,0,2)).reshape(-1,nvir), t1.T, 1, tmp.reshape(-1,nocc)) lib.dot(tmp.reshape(nocc,-1).T, t1, -1, wOVov[j].reshape(-1,nvir), 1) lib.dot(eris_ovov[j].reshape(nvir,-1).T, t1.T, 1, tmp.reshape(-1,nocc)) lib.dot(tmp.reshape(nocc,-1).T, t1, 1, wOvOv[j].reshape(-1,nvir), 1) tmp = None #:vOvOv = numpy.einsum('jdlb,kldc->jbkc', eris_ovov, t2) ovovtmp = _cp(eris_ovov.transpose(0,3,2,1).reshape(-1,nov)) vOvOv = numpy.empty((nocc,nvir,nocc,nvir)) for j in range(nocc): lib.dot(t2[j].reshape(-1,nvir).T, ovovtmp.T, 1, vOvOv[j].reshape(nvir,-1)) vOvOv[j] -= eris.oovv[j].transpose(2,0,1) ovovtmp = eris_ovov = None vOvOv = lib.transpose(vOvOv.reshape(nov,-1)).reshape(nocc,nvir,nocc,nvir) wOVov += vOVov wOvOv += vOvOv saved.wOVov = wOVov saved.wOvOv = wOvOv ovovtmp = wOVov = wOvOv = eris_ovov = None ov2 = vOVov*2 + vOvOv w3 += numpy.einsum('kcjb,kc->bj', ov2, t1) #:wooov += numpy.einsum('ibjc,kb->ijkc', ov2, t1) #:wovvv -= numpy.einsum('jakb,jc->kabc', ov2, t1) for i in range(nocc): wooov[i] += lib.dot(t1, ov2[i].reshape(nvir,-1)).reshape(nocc,nocc,nvir).transpose(1,0,2) lib.dot(_cp(ov2.transpose(0,2,1,3).reshape(nocc,-1)).T, t1, -1, wovvv.reshape(-1,nvir), 1) ov2 = None ov1 = vOvOv*2 + vOVov #:wooov -= numpy.einsum('ibkc,jb->ijkc', ov1, t1) #:wovvv += numpy.einsum('jakc,jb->kabc', ov1, t1) for i in range(nocc): lib.dot(t1, ov1[i].reshape(nvir,-1), -1, wooov[i].reshape(nocc,-1), 1) wovvv += lib.dot(_cp(ov1.reshape(nocc,-1)).T, t1).reshape(nvir,-1,nvir,nvir).transpose(1,0,3,2) ov1 = None woooo += _cp(eris.oooo).transpose(0,2,1,3) saved.woooo = woooo saved.wooov = wooov woooo = wooov = None w3 += numpy.einsum('bc,jc->bj', w1, t1) w3 -= numpy.einsum('kj,kb->bj', w2, t1) eris_ooov = _cp(eris.ooov) g2ooov = eris_ooov * 2 g2ooov -= eris_ooov.transpose(2,0,1,3) #:tmp = numpy.einsum('kjla,jb->kabl', g2ooov, t1) #:wovvv = numpy.einsum('kabl,lc->kabc', tmp, t1) #:wovvv += numpy.einsum('kjla,jlbc->kabc', g2ooov, t2) tmp = lib.dot(g2ooov.reshape(nocc,-1).T, t1).reshape(-1,nocc,nvir,nvir).transpose(0,2,3,1) lib.dot(_cp(tmp.reshape(-1,nocc)), t1, 1, wovvv.reshape(-1,nvir), 1) tmp = None lib.dot(_cp(g2ooov.transpose(0,2,1,3).reshape(nocc**2,-1)).T, t2.reshape(nocc**2,-1), 1, wovvv.reshape(nov,-1), 1) g2ooov = eris_ooov = vOVov = vOvOv = None saved.wovvv = wovvv saved.w1 = w1 saved.w2 = w2 saved.w3 = w3 saved.w4 = w4 return saved
def make_intermediates(mycc, t1, t2, eris): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[nocc:, nocc:] class _Saved(object): pass saved = _Saved() # As we don't have l2 in memory, hold tau temporarily in memory w1 = fvv - numpy.einsum('ja,jb->ba', fov, t1) w2 = foo + numpy.einsum('ib,jb->ij', fov, t1) w3 = _cp(numpy.einsum('kc,jkbc->bj', fov, t2) * 2 + fov.T) w3 -= numpy.einsum('kc,kjbc->bj', fov, t2) w3 += reduce(numpy.dot, (t1.T, fov, t1.T)) w4 = fov.copy() eris_ovvv = _cp(eris.ovvv) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape(nov, -1)) eris_ovvv = eris_ovvv.reshape(nocc, nvir, nvir, nvir) wovvv = numpy.empty((nocc, nvir, nvir, nvir)) t2tmp = numpy.empty((nocc, nvir, nocc, nvir)) for i in range(nocc): wovvv[i] = eris_ovvv[i].transpose(1, 0, 2) * 2 t2tmp[i] = t2[i].transpose(2, 0, 1) #:wovvv += numpy.einsum('jabd,kjdc->kabc', eris_ovvv, t2) * -1.5 tmp = lib.dot(t2tmp.reshape(nov, -1), wovvv.reshape(-1, nvir**2), -1.5 / 2).reshape(-1, nvir, nvir, nvir) g2ovvv = tmp for i in range(nocc): wovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) wovvv[i] += tmp[i].transpose(1, 2, 0) g2ovvv[i] = eris_ovvv[i] * 2 g2ovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) tmp = t2tmp = None w1 += numpy.einsum('jcba,jc->ba', eris_ovvv, t1 * 2) w1 -= numpy.einsum('jabc,jc->ba', eris_ovvv, t1) #:w3 += numpy.einsum('kdcb,kjdc->bj', eris_ovvv, theta) theta = numpy.empty(t2.shape) for i in range(nocc): theta[i] = t2[i] * 2 theta[i] -= t2[i].transpose(0, 2, 1) lib.dot(eris_ovvv[i].reshape(-1, nvir).T, _cp(theta[i].reshape(nocc, -1)).T, 1, w3, 1) theta = _cp(theta.transpose(0, 2, 1, 3)) #:vkbca = numpy.einsum('jdca,kbjd->kbca', g2ovvv, theta) vkbca = lib.dot(_cp(theta.reshape(nov, -1)), g2ovvv.reshape(-1, nvir * nvir)).reshape(-1, nvir, nvir, nvir) for i in range(nocc): wovvv[i] += vkbca[i].transpose(2, 0, 1) wovvv[i] -= vkbca[i].transpose(2, 1, 0) * .5 vkabc = None #:wOVov = numpy.einsum('jbcd,kd->jbkc', eris_ovvv, t1) #:wOvOv = numpy.einsum('jdcb,kd->jbkc', eris_ovvv, -t1) wOVov = lib.dot(eris_ovvv.reshape(-1, nvir), t1.T).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() for i in range(nocc): g2ovvv[i] = eris_ovvv[i].transpose(1, 2, 0) * 2 wOvOv = lib.dot(g2ovvv.reshape(-1, nvir), -t1.T, .5).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() for i in range(nocc): g2ovvv[i] -= eris_ovvv[i].transpose(1, 0, 2) eris_ovov = _cp(_cp(eris.ovov).transpose(0, 2, 1, 3)) tau = _ccsd.make_tau(t2, t1, t1) #:wooov[:,j0:j1] = numpy.einsum('icbd,jkbd->ijkc', g2ovvv, tau) #:woooo[:,:,j0:j1] = numpy.einsum('icjd,klcd->ijkl', eris_ovov, tau) tmp = lib.dot(g2ovvv.reshape(-1, nvir**2), tau.reshape(-1, nvir**2).T) wooov = _cp(tmp.reshape(-1, nvir, nocc, nocc).transpose(0, 2, 3, 1)) woooo = lib.dot(eris_ovov.reshape(-1, nvir**2), tau.reshape(-1, nvir**2).T).reshape(-1, nocc, nocc, nocc) eris_ovov = eris_ovvv = g2ovvv = tau = tmp = None eris_ooov = _cp(eris.ooov) eris_ovoo = _cp(eris.ovoo) #:woooo += numpy.einsum('icjl,kc->ijkl', eris_ovoo, t1) #:wOVov += numpy.einsum('jblk,lc->jbkc', eris_ovoo, -t1) for i in range(nocc): woooo[i] += lib.dot(t1, eris_ovoo[i].reshape(nvir, -1)).reshape( (nocc, ) * 3).transpose(1, 0, 2) lib.dot(eris_ovoo.reshape(-1, nocc), t1, -1, wOVov.reshape(-1, nvir), 1) #:wooov -= numpy.einsum('ibjl,lkcb->ijkc', eris_ovoo*1.5, t2) t2tmp = numpy.empty((nocc, nvir, nocc, nvir)) for i in range(nocc): t2tmp[i] = t2[i].transpose(2, 0, 1) tmp_ooov = _cp(-eris_ooov.transpose(2, 0, 1, 3)).reshape(-1, nov) lib.dot(tmp_ooov, t2tmp.reshape(nov, -1), 1.5, wooov.reshape(-1, nov), 1) t2tmp = None g2ooov, tmp_ooov = tmp_ooov.reshape(nocc, nocc, nocc, nvir), None g2ooov += eris_ooov * 2 #:vikjc = numpy.einsum('iklb,jlcb->ikjc', g2ooov, theta) vikjc = lib.dot(g2ooov.reshape(-1, nov), theta.reshape(-1, nov).T) vikjc = vikjc.reshape(nocc, nocc, nocc, nvir) wooov += vikjc.transpose(0, 2, 1, 3) wooov -= vikjc * .5 g2ooov = vikjc = eris_ovoo = None w2 += numpy.einsum('ijkb,kb->ij', eris_ooov, t1) * 2 w2 -= numpy.einsum('kjib,kb->ij', eris_ooov, t1) #:w3 -= numpy.einsum('kjlc,klbc->bj', eris_ooov, theta) for i in range(nocc): lib.dot( _cp(theta[i].transpose(1, 2, 0)).reshape(-1, nvir).T, eris_ooov[i].reshape(nocc, -1).T, -1, w3, 1) #:woooo += numpy.einsum('ikjc,lc->ijkl', eris_ooov, t1) #:wOvOv += numpy.einsum('jklb,lc->jbkc', eris_ooov, t1) woooo += lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape( (-1, nocc, nocc, nocc)).transpose(0, 2, 1, 3) for i in range(nocc): lib.dot( _cp(eris_ooov[i].transpose(2, 0, 1)).reshape(-1, nocc), t1, 1, wOvOv[i].reshape(-1, nvir), 1) wooov[i] += eris_ooov[i].transpose(1, 0, 2) * 2 wooov[i] -= eris_ooov[i] eris_ooov = theta = None eris_ovov = _cp(eris.ovov) g2ovov = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): g2ovov[i] = eris_ovov[i].transpose(1, 0, 2) * 2 g2ovov[i] -= eris_ovov[i].transpose(1, 2, 0) tmpw4 = numpy.einsum('klcd,ld->kc', g2ovov, t1) #:w1 -= numpy.einsum('kcja,kjcb->ba', g2ovov, t2) w1 -= lib.dot(t2.reshape(-1, nvir).T, g2ovov.reshape(-1, nvir)) w1 -= numpy.einsum('ja,jb->ba', tmpw4, t1) #:w2 += numpy.einsum('ibkc,jkbc->ij', g2ovov, t2) w2 += lib.dot(g2ovov.reshape(nocc, -1), t2.reshape(nocc, -1).T) w2 += numpy.einsum('ib,jb->ij', tmpw4, t1) w3 += reduce(numpy.dot, (t1.T, tmpw4, t1.T)) w4 += tmpw4 vOVov = eris_ovov.copy() #:vOVov += numpy.einsum('jbld,klcd->jbkc', g2ovov, t2) #:vOVov -= numpy.einsum('jbld,kldc->jbkc', eris_ovov, t2) lib.dot( _cp(g2ovov.transpose(0, 2, 1, 3)).reshape(-1, nov), _cp(t2.transpose(0, 2, 1, 3).reshape(nov, -1).T), 1, vOVov.reshape(nov, -1), 1) lib.dot(eris_ovov.reshape(-1, nov), _cp(t2.transpose(0, 3, 1, 2).reshape(nov, -1).T), -1, vOVov.reshape(nov, -1), 1) g2ovov = None #:tmp = numpy.einsum('jbld,kd->ljbk', eris_ovov, t1) #:wOVov -= numpy.einsum('ljbk,lc->jbkc', tmp, t1) #:tmp = numpy.einsum('jdlb,kd->ljbk', eris_ovov, t1) #:wOvOv += numpy.einsum('ljbk,lc->jbkc', tmp, t1) tmp = numpy.empty((nocc, nvir, nocc)) for j in range(nocc): lib.dot( _cp(eris_ovov[j].transpose(1, 0, 2)).reshape(-1, nvir), t1.T, 1, tmp.reshape(-1, nocc)) lib.dot(tmp.reshape(nocc, -1).T, t1, -1, wOVov[j].reshape(-1, nvir), 1) lib.dot(eris_ovov[j].reshape(nvir, -1).T, t1.T, 1, tmp.reshape(-1, nocc)) lib.dot(tmp.reshape(nocc, -1).T, t1, 1, wOvOv[j].reshape(-1, nvir), 1) tmp = None #:vOvOv = numpy.einsum('jdlb,kldc->jbkc', eris_ovov, t2) ovovtmp = _cp(eris_ovov.transpose(0, 3, 2, 1).reshape(-1, nov)) vOvOv = numpy.empty((nocc, nvir, nocc, nvir)) for j in range(nocc): lib.dot(t2[j].reshape(-1, nvir).T, ovovtmp.T, 1, vOvOv[j].reshape(nvir, -1)) vOvOv[j] -= eris.oovv[j].transpose(2, 0, 1) ovovtmp = eris_ovov = None vOvOv = lib.transpose(vOvOv.reshape(nov, -1)).reshape(nocc, nvir, nocc, nvir) wOVov += vOVov wOvOv += vOvOv saved.wOVov = wOVov saved.wOvOv = wOvOv ovovtmp = wOVov = wOvOv = eris_ovov = None ov2 = vOVov * 2 + vOvOv w3 += numpy.einsum('kcjb,kc->bj', ov2, t1) #:wooov += numpy.einsum('ibjc,kb->ijkc', ov2, t1) #:wovvv -= numpy.einsum('jakb,jc->kabc', ov2, t1) for i in range(nocc): wooov[i] += lib.dot(t1, ov2[i].reshape(nvir, -1)).reshape( nocc, nocc, nvir).transpose(1, 0, 2) lib.dot( _cp(ov2.transpose(0, 2, 1, 3).reshape(nocc, -1)).T, t1, -1, wovvv.reshape(-1, nvir), 1) ov2 = None ov1 = vOvOv * 2 + vOVov #:wooov -= numpy.einsum('ibkc,jb->ijkc', ov1, t1) #:wovvv += numpy.einsum('jakc,jb->kabc', ov1, t1) for i in range(nocc): lib.dot(t1, ov1[i].reshape(nvir, -1), -1, wooov[i].reshape(nocc, -1), 1) wovvv += lib.dot(_cp(ov1.reshape(nocc, -1)).T, t1).reshape(nvir, -1, nvir, nvir).transpose(1, 0, 3, 2) ov1 = None woooo += _cp(eris.oooo).transpose(0, 2, 1, 3) saved.woooo = woooo saved.wooov = wooov woooo = wooov = None w3 += numpy.einsum('bc,jc->bj', w1, t1) w3 -= numpy.einsum('kj,kb->bj', w2, t1) eris_ooov = _cp(eris.ooov) g2ooov = eris_ooov * 2 g2ooov -= eris_ooov.transpose(2, 0, 1, 3) #:tmp = numpy.einsum('kjla,jb->kabl', g2ooov, t1) #:wovvv = numpy.einsum('kabl,lc->kabc', tmp, t1) #:wovvv += numpy.einsum('kjla,jlbc->kabc', g2ooov, t2) tmp = lib.dot(g2ooov.reshape(nocc, -1).T, t1).reshape(-1, nocc, nvir, nvir).transpose(0, 2, 3, 1) lib.dot(_cp(tmp.reshape(-1, nocc)), t1, 1, wovvv.reshape(-1, nvir), 1) tmp = None lib.dot( _cp(g2ooov.transpose(0, 2, 1, 3).reshape(nocc**2, -1)).T, t2.reshape(nocc**2, -1), 1, wovvv.reshape(nov, -1), 1) g2ooov = eris_ooov = vOVov = vOvOv = None saved.wovvv = wovvv saved.w1 = w1 saved.w2 = w2 saved.w3 = w3 saved.w4 = w4 return saved
def update_amps(mycc, t1, t2, l1, l2, eris=None, saved=None): if saved is None: saved = make_intermediates(mycc, t1, t2, eris) time1 = time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[:nocc, :nocc] #:mba = numpy.einsum('klca,klcb->ba', l2, t2*2-t2.transpose(0,1,3,2)) #:mij = numpy.einsum('ikcd,jkcd->ij', l2, t2*2-t2.transpose(0,1,3,2)) #:theta = t2*2 - t2.transpose(0,1,3,2) theta = _ccsd.make_0132(t2, t2, 2, -1) mba = lib.dot(theta.reshape(-1, nvir).T, l2.reshape(-1, nvir)) mij = lib.dot(l2.reshape(nocc, -1), theta.reshape(nocc, -1).T) theta = None mba1 = numpy.einsum('jc,jb->bc', l1, t1) + mba mij1 = numpy.einsum('kb,jb->kj', l1, t1) + mij mia1 = (t1 + numpy.einsum('kc,jkbc->jb', l1, t2) * 2 - numpy.einsum('kc,jkcb->jb', l1, t2) - reduce(numpy.dot, (t1, l1.T, t1)) - numpy.einsum('bd,jd->jb', mba, t1) - numpy.einsum('lj,lb->jb', mij, t1)) tmp = mycc.add_wvvVV(numpy.zeros_like(l1), l2, eris) l2new = numpy.empty((nocc, nocc, nvir, nvir)) ij = 0 for i in range(nocc): for j in range(i): tmp1 = tmp[ij] * .5 # *.5 because of l2+l2.transpose(1,0,3,2) later l2new[i, j] = tmp1 l2new[j, i] = tmp1.T ij += 1 l2new[i, i] = tmp[ij] * .5 ij += 1 l1new = (numpy.einsum('ijab,jb->ia', l2new, t1) * 4 - numpy.einsum('jiab,jb->ia', l2new, t1) * 2) tmp = tmp1 = None l1new += eris.fock[:nocc, nocc:] l1new += numpy.einsum('ib,ba->ia', l1, saved.w1) l1new -= numpy.einsum('ja,ij->ia', l1, saved.w2) l1new -= numpy.einsum('ik,ka->ia', mij, saved.w4) l1new -= numpy.einsum('ca,ic->ia', mba, saved.w4) l1new += numpy.einsum('ijab,bj->ia', l2, saved.w3) * 2 l1new -= numpy.einsum('ijba,bj->ia', l2, saved.w3) l2new += numpy.einsum('ia,jb->ijab', l1, saved.w4) #:l2new += numpy.einsum('jibc,ca->jiba', l2, saved.w1) #:l2new -= numpy.einsum('kiba,jk->jiba', l2, saved.w2) lib.dot(l2.reshape(-1, nvir), saved.w1, 1, l2new.reshape(-1, nvir), 1) lib.dot(saved.w2, l2.reshape(nocc, -1), -1, l2new.reshape(nocc, -1), 1) eris_ooov = _cp(eris.ooov) l1new -= numpy.einsum('jkia,kj->ia', eris_ooov, mij1) * 2 l1new += numpy.einsum('ikja,kj->ia', eris_ooov, mij1) #:l2new -= numpy.einsum('ka,kijb->jiba', l1, eris_ooov) lib.dot( _cp(eris_ooov.transpose(0, 2, 1, 3).reshape(nocc, -1)).T, l1, -1, l2new.reshape(-1, nvir), 1) eris_ooov = None tau = _ccsd.make_tau(t2, t1, t1) #:l2tau = numpy.einsum('ijcd,klcd->ijkl', l2, tau) l2tau = lib.dot(l2.reshape(nocc**2, -1), tau.reshape(nocc**2, -1).T).reshape((nocc, ) * 4) tau = None l2t1 = numpy.einsum('ijcd,kc->ijkd', l2, t1) eris_ovvv = _cp(eris.ovvv) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape(nov, -1)) eris_ovvv = eris_ovvv.reshape(nocc, nvir, nvir, nvir) l1new += numpy.einsum('iabc,bc->ia', eris_ovvv, mba1) * 2 l1new -= numpy.einsum('ibca,bc->ia', eris_ovvv, mba1) #:l2new += numpy.einsum('ic,jbac->jiba', l1, eris_ovvv) tmp = lib.dot(l1, eris_ovvv.reshape(-1, nvir).T).reshape(nocc, -1, nvir, nvir) for i in range(nocc): l2new[i] += tmp[i].transpose(0, 2, 1) #:m4 = numpy.einsum('ijkd,kadb->ijab', l2t1, eris_ovvv) m4 = tmp lib.dot(_cp(l2t1.reshape(nocc * nocc, -1)), _cp(eris_ovvv.transpose(0, 2, 1, 3).reshape(-1, nvir**2)), 1, m4.reshape(nocc * nocc, -1)) l2new -= m4 l1new -= numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijab,ia->jb', m4, t1) * 2 l1new += numpy.einsum('jiab,jb->ia', m4, t1) l1new += numpy.einsum('jiab,ia->jb', m4, t1) eris_ovvv = tmp = None eris_ovov = _cp(eris.ovov) l1new += numpy.einsum('jb,iajb->ia', l1, eris_ovov) * 2 #:l2new -= numpy.einsum('jbic,ca->jiba', eris_ovov, mba1) #:l2new -= numpy.einsum('kajb,ik->ijab', eris_ovov, mij1) tmp = lib.dot(eris_ovov.reshape(-1, nvir), mba1).reshape(nocc, nvir, nocc, nvir) lib.dot(mij1, eris_ovov.reshape(nocc, -1), 1, tmp.reshape(nocc, -1), 1) tmp_oovv = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): tmp_oovv[i] = eris_ovov[i].transpose(1, 0, 2) * .5 l2new[i] += tmp_oovv[i] l2new[i] -= tmp[i].transpose(1, 0, 2) tmp = None l1new += numpy.einsum('iajb,jb->ia', eris_ovov, mia1) * 2 l1new -= numpy.einsum('ibja,jb->ia', eris_ovov, mia1) #:m4 = numpy.einsum('kalb,ijkl->ijab', eris_ovov, l2tau) lib.dot(l2tau.reshape(nocc * nocc, -1), tmp_oovv.reshape(-1, nvir**2), 1, m4.reshape(nocc**2, -1)) l2new += m4 l1new += numpy.einsum('ijab,jb->ia', m4, t1) * 4 l1new -= numpy.einsum('ijba,jb->ia', m4, t1) * 2 eris_ovov = m4 = tmp_oovv = None eris_oovv = _cp(eris.oovv) l1new -= numpy.einsum('jb,ijba->ia', l1, eris_oovv) eris_oovv = None saved_wooov = _cp(saved.wooov) #:l1new -= numpy.einsum('jkca,ijkc->ia', l2, saved_wooov) l1new -= lib.dot(saved_wooov.reshape(nocc, -1), l2.reshape(-1, nvir)) saved_wovvv = _cp(saved.wovvv) #:l1new += numpy.einsum('kibc,kabc->ia', l2, saved_wovvv) for j in range(nocc): l1new += lib.dot(l2[j].reshape(nocc, -1), saved_wovvv[j].reshape(nvir, -1).T) saved_wooov = saved_wovvv = None saved_wOvOv = _cp(saved.wOvOv) tmp_ovov = _cp(saved.wOVov) * 2 tmp_ovov += saved_wOvOv #:tmp = l2.transpose(0,2,1,3) - l2.transpose(0,3,1,2)*.5 #:l2new += numpy.einsum('kcia,kcjb->jiba', tmp, tmp_ovov) tmp = numpy.empty((nocc, nvir, nocc, nvir)) for i in range(nocc): tmp[i] = l2[i].transpose(2, 0, 1) * -.5 tmp[i] += l2[i].transpose(1, 0, 2) tmp = lib.dot(tmp_ovov.reshape(-1, nov), tmp.reshape(nov, -1)).reshape(-1, nvir, nocc, nvir) #:tmp = numpy.einsum('jkca,ibkc->ijab', l2, saved_wOvOv) for i in range(nocc): l2new[i] += tmp[i].transpose(1, 0, 2) tmp_ovov[i] = l2[i].transpose(2, 0, 1) lib.dot(saved_wOvOv.reshape(-1, nov), tmp_ovov.reshape(nov, -1), 1, tmp.reshape(nov, -1)) for i in range(nocc): l2new[i] += tmp[i].transpose(1, 2, 0) l2new[i] += tmp[i].transpose(1, 0, 2) * .5 saved_wOvOv = tmp = tmp_ovov = None saved_woooo = _cp(saved.woooo) #:m3 = numpy.einsum('klab,ijkl->ijab', l2, saved_woooo) m3 = lib.dot(saved_woooo.reshape(-1, nocc**2), l2.reshape(nocc**2, -1), .5).reshape(-1, nocc, nvir, nvir) l2new += m3 l1new += numpy.einsum('ijab,jb->ia', m3, t1) * 4 l1new -= numpy.einsum('ijba,jb->ia', m3, t1) * 2 saved_woooo = m3 = None mo_e = eris.fock.diagonal() eia = lib.direct_sum('i-j->ij', mo_e[:nocc], mo_e[nocc:]) l1new /= eia l1new += l1 # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 ij = 0 for i in range(nocc): for j in range(i): dab = lib.direct_sum('a+b->ab', eia[i], eia[j]) tmp = (l2new[i, j] + l2new[j, i].T) / dab + l2[i, j] l2new[i, j] = tmp l2new[j, i] = tmp.T ij += 1 dab = lib.direct_sum('a+b->ab', eia[i], eia[i]) l2new[i, i] = (l2new[i, i] + l2new[i, i].T) / dab + l2[i, i] ij += 1 time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def update_lambda(mycc, t1, t2, l1, l2, eris=None, imds=None): if imds is None: imds = make_intermediates(mycc, t1, t2, eris) time1 = time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir fov = eris.fock[:nocc,nocc:] mo_e_o = eris.mo_energy[:nocc] mo_e_v = eris.mo_energy[nocc:] + mycc.level_shift theta = t2*2 - t2.transpose(0,1,3,2) mba = lib.einsum('klca,klcb->ba', l2, theta) mij = lib.einsum('ikcd,jkcd->ij', l2, theta) theta = None mba1 = numpy.einsum('jc,jb->bc', l1, t1) + mba mij1 = numpy.einsum('kb,jb->kj', l1, t1) + mij mia1 = t1 + numpy.einsum('kc,jkbc->jb', l1, t2) * 2 mia1 -= numpy.einsum('kc,jkcb->jb', l1, t2) mia1 -= reduce(numpy.dot, (t1, l1.T, t1)) mia1 -= numpy.einsum('bd,jd->jb', mba, t1) mia1 -= numpy.einsum('lj,lb->jb', mij, t1) l2new = mycc._add_vvvv(None, l2, eris, with_ovvv=False, t2sym='jiba') l1new = numpy.einsum('ijab,jb->ia', l2new, t1) * 2 l1new -= numpy.einsum('jiab,jb->ia', l2new, t1) l2new *= .5 # *.5 because of l2+l2.transpose(1,0,3,2) in the end tmp = tmp1 = None w1 = imds.w1 - numpy.diag(mo_e_v) w2 = imds.w2 - numpy.diag(mo_e_o) l1new += fov l1new += numpy.einsum('ib,ba->ia', l1, w1) l1new -= numpy.einsum('ja,ij->ia', l1, w2) l1new -= numpy.einsum('ik,ka->ia', mij, imds.w4) l1new -= numpy.einsum('ca,ic->ia', mba, imds.w4) l1new += numpy.einsum('ijab,bj->ia', l2, imds.w3) * 2 l1new -= numpy.einsum('ijba,bj->ia', l2, imds.w3) l2new += numpy.einsum('ia,jb->ijab', l1, imds.w4) l2new += lib.einsum('jibc,ca->jiba', l2, w1) l2new -= lib.einsum('jk,kiba->jiba', w2, l2) eris_ovoo = _cp(eris.ovoo) l1new -= numpy.einsum('iajk,kj->ia', eris_ovoo, mij1) * 2 l1new += numpy.einsum('jaik,kj->ia', eris_ovoo, mij1) l2new -= lib.einsum('jbki,ka->jiba', eris_ovoo, l1) eris_ovoo = None tau = _ccsd.make_tau(t2, t1, t1) l2tau = lib.einsum('ijcd,klcd->ijkl', l2, tau) tau = None l2t1 = lib.einsum('jidc,kc->ijkd', l2, t1) max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) unit = nocc*nvir**2*5 blksize = min(nocc, max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit))) log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize, nocc, int((nocc+blksize-1)/blksize)) l1new -= numpy.einsum('jb,jiab->ia', l1, _cp(eris.oovv)) for p0, p1 in lib.prange(0, nvir, blksize): eris_ovvv = eris.get_ovvv(slice(None), slice(p0,p1)) l1new[:,p0:p1] += numpy.einsum('iabc,bc->ia', eris_ovvv, mba1) * 2 l1new -= numpy.einsum('ibca,bc->ia', eris_ovvv, mba1[p0:p1]) l2new[:,:,p0:p1] += lib.einsum('jbac,ic->jiba', eris_ovvv, l1) m4 = lib.einsum('ijkd,kadb->ijab', l2t1, eris_ovvv) l2new[:,:,p0:p1] -= m4 l1new[:,p0:p1] -= numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijab,ia->jb', m4, t1[:,p0:p1]) * 2 l1new[:,p0:p1] += numpy.einsum('jiab,jb->ia', m4, t1) l1new += numpy.einsum('jiab,ia->jb', m4, t1[:,p0:p1]) eris_ovvv = m4buf = m4 = None eris_voov = _cp(eris.ovvo[:,p0:p1].transpose(1,0,3,2)) l1new[:,p0:p1] += numpy.einsum('jb,aijb->ia', l1, eris_voov) * 2 l2new[:,:,p0:p1] += eris_voov.transpose(1,2,0,3) * .5 l2new[:,:,p0:p1] -= lib.einsum('bjic,ca->jiba', eris_voov, mba1) l2new[:,:,p0:p1] -= lib.einsum('bjka,ik->jiba', eris_voov, mij1) l1new[:,p0:p1] += numpy.einsum('aijb,jb->ia', eris_voov, mia1) * 2 l1new -= numpy.einsum('bija,jb->ia', eris_voov, mia1[:,p0:p1]) m4 = lib.einsum('ijkl,aklb->ijab', l2tau, eris_voov) l2new[:,:,p0:p1] += m4 * .5 l1new[:,p0:p1] += numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijba,jb->ia', m4, t1[:,p0:p1]) saved_wvooo = _cp(imds.wvooo[p0:p1]) l1new -= lib.einsum('ckij,jkca->ia', saved_wvooo, l2[:,:,p0:p1]) saved_wvovv = _cp(imds.wvvov[p0:p1]) # Watch out memory usage here, due to the l2 transpose l1new[:,p0:p1] += lib.einsum('abkc,kibc->ia', saved_wvovv, l2) saved_wvooo = saved_wvovv = None saved_wvOOv = _cp(imds.wvOOv[p0:p1]) tmp_voov = _cp(imds.wVOov[p0:p1]) * 2 tmp_voov += saved_wvOOv tmp = l2.transpose(0,2,1,3) - l2.transpose(0,3,1,2)*.5 l2new[:,:,p0:p1] += lib.einsum('iakc,bjkc->jiba', tmp, tmp_voov) tmp = tmp1 = tmp_ovov = None tmp = lib.einsum('jkca,bikc->jiba', l2, saved_wvOOv) l2new[:,:,p0:p1] += tmp l2new[:,:,p0:p1] += tmp.transpose(1,0,2,3) * .5 saved_wvOOv = tmp = None saved_woooo = _cp(imds.woooo) m3 = lib.einsum('ijkl,klab->ijab', saved_woooo, l2) l2new += m3 * .5 l1new += numpy.einsum('ijab,jb->ia', m3, t1) * 2 l1new -= numpy.einsum('ijba,jb->ia', m3, t1) saved_woooo = m3 = None #time1 = log.timer_debug1('lambda pass [%d:%d]'%(p0, p1), *time1) eia = lib.direct_sum('i-a->ia', mo_e_o, mo_e_v) l1new /= eia # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 ij = 0 for i in range(nocc): if i > 0: l2new[i,:i] += l2new[:i,i].transpose(0,2,1) l2new[i,:i] /= lib.direct_sum('a,jb->jab', eia[i], eia[:i]) l2new[:i,i] = l2new[i,:i].transpose(0,2,1) l2new[i,i] = l2new[i,i] + l2new[i,i].T l2new[i,i] /= lib.direct_sum('a,b->ab', eia[i], eia[i]) time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def make_intermediates(mycc, t1, t2, eris): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir foo = eris.fock[:nocc, :nocc] fov = eris.fock[:nocc, nocc:] fvv = eris.fock[nocc:, nocc:] class _Saved: pass saved = _Saved() saved._tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) saved.ftmp = ftmp = h5py.File(saved._tmpfile.name) def __del__(): ftmp.close() saved.ftmp.__del__ = __del__ saved.woooo = saved.ftmp.create_dataset("woooo", (nocc, nocc, nocc, nocc), "f8") saved.wooov = saved.ftmp.create_dataset("wooov", (nocc, nocc, nocc, nvir), "f8") saved.wOVov = saved.ftmp.create_dataset("wOVov", (nocc, nvir, nocc, nvir), "f8") saved.wOvOv = saved.ftmp.create_dataset("wOvOv", (nocc, nvir, nocc, nvir), "f8") saved.wovvv = saved.ftmp.create_dataset("wovvv", (nocc, nvir, nvir, nvir), "f8") # As we don't have l2 in memory, hold tau temporarily in memory w1 = fvv - numpy.einsum("ja,jb->ba", fov, t1) w2 = foo + numpy.einsum("ib,jb->ij", fov, t1) w3 = numpy.einsum("kc,jkbc->bj", fov, t2) * 2 + fov.T w3 -= numpy.einsum("kc,kjbc->bj", fov, t2) w3 += reduce(numpy.dot, (t1.T, fov, t1.T)) w4 = fov.copy() _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) time1 = time.clock(), time.time() unit = max( nocc * nvir ** 2 * 4 + nvir ** 3 * 2, nvir ** 3 * 3 + nocc * nvir ** 2, nocc * nvir ** 2 * 6 + nocc ** 2 * nvir + nocc ** 3 + nocc ** 2 * nvir, ) max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max(ccsd.BLKMIN, int(max_memory * 0.95e6 / 8 / unit)) log.debug1( "ccsd lambda make_intermediates: block size = %d, nocc = %d in %d blocks", blksize, nocc, int((nocc + blksize - 1) // blksize), ) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): eris_ovvv = _cp(eris.ovvv[p0:p1]) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) w1 += numpy.einsum("jcba,jc->ba", eris_ovvv, t1[p0:p1] * 2) w1 -= numpy.einsum("jabc,jc->ba", eris_ovvv, t1[p0:p1]) #:w3 += numpy.einsum('kdcb,kjdc->bj', eris_ovvv, theta) for i in range(p1 - p0): theta = t2[p0 + i] * 2 theta -= t2[p0 + i].transpose(0, 2, 1) w3 += lib.dot(eris_ovvv[i].reshape(-1, nvir).T, _cp(theta.reshape(nocc, -1)).T) theta = None #:wOVov = numpy.einsum('jbcd,kd->jbkc', eris_ovvv, t1) #:wOvOv = numpy.einsum('jdcb,kd->jbkc', eris_ovvv, -t1) wOVov = lib.dot(eris_ovvv.reshape(-1, nvir), t1.T).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() g2ovvv = _cp(eris_ovvv.transpose(0, 2, 3, 1)) wOvOv = lib.dot(g2ovvv.reshape(-1, nvir), -t1.T).reshape(-1, nvir, nvir, nocc).transpose(0, 1, 3, 2).copy() for i in range(p1 - p0): g2ovvv[i] *= 2 g2ovvv[i] -= eris_ovvv[i].transpose(1, 0, 2) wooov = numpy.empty((p1 - p0, nocc, nocc, nvir)) woooo = numpy.empty((p1 - p0, nocc, nocc, nocc)) eris_ovov = _cp(_cp(eris.ovov[p0:p1]).transpose(0, 2, 1, 3)) for j0, j1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:wooov[:,j0:j1] = numpy.einsum('icbd,jkbd->ijkc', g2ovvv, tau) #:woooo[:,:,j0:j1] = numpy.einsum('icjd,klcd->ijkl', eris_ovov, tau) tmp = lib.dot(g2ovvv.reshape(-1, nvir ** 2), tau.reshape(-1, nvir ** 2).T) wooov[:, j0:j1] = tmp.reshape(-1, nvir, j1 - j0, nocc).transpose(0, 2, 3, 1) woooo[:, :, j0:j1] = lib.dot(eris_ovov.reshape(-1, nvir ** 2), tau.reshape(-1, nvir ** 2).T).reshape( -1, nocc, j1 - j0, nocc ) eris_ovov = eris_ovvv = g2ovvv = tau = tmp = None # ==== mem usage nocc*nvir**2*2 + nocc**2*nvir + nocc**3 + nvir**3*2 + nocc*nvir**2*2 eris_ooov = _cp(eris.ooov[p0:p1]) w2[p0:p1] += numpy.einsum("ijkb,kb->ij", eris_ooov, t1) * 2 w2 -= numpy.einsum("kjib,kb->ij", eris_ooov, t1[p0:p1]) #:w3 -= numpy.einsum('kjlc,klbc->bj', eris_ooov, theta) for i in range(p1 - p0): theta = t2[p0 + i].transpose(0, 2, 1) * 2 theta -= t2[p0 + i] w3 -= lib.dot(theta.reshape(-1, nvir).T, eris_ooov[i].reshape(nocc, -1).T) theta = None #:woooo += numpy.einsum('ikjc,lc->ijkl', eris_ooov, t1) #:wOvOv += numpy.einsum('jklb,lc->jbkc', eris_ooov, t1) woooo += lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape((-1, nocc, nocc, nocc)).transpose(0, 2, 1, 3) for i in range(p1 - p0): lib.dot(_cp(eris_ooov[i].transpose(2, 0, 1)).reshape(-1, nocc), t1, 1, wOvOv[i].reshape(-1, nvir), 1) wooov[i] += eris_ooov[i].transpose(1, 0, 2) * 2 wooov[i] -= eris_ooov[i] eris_ovoo = _cp(eris.ovoo[p0:p1]) #:woooo += numpy.einsum('icjl,kc->ijkl', eris_ovoo, t1) #:wOVov += numpy.einsum('jbkl,lc->jbkc', eris_ovoo, -t1) for i in range(p1 - p0): woooo[i] += lib.dot(t1, eris_ovoo[i].reshape(nvir, -1)).reshape((nocc,) * 3).transpose(1, 0, 2) lib.dot(eris_ovoo.reshape(-1, nocc), t1, -1, wOVov.reshape(-1, nvir), 1) #:wooov -= numpy.einsum('iblj,klbc->ijkc', eris_ovoo*1.5, t2) tmp_ovoo = _cp(-eris_ovoo.transpose(0, 2, 3, 1)).reshape(-1, nov) for j in range(nocc): wooov[:, :, j] += lib.dot(tmp_ovoo, t2[j].reshape(-1, nvir), 1.5).reshape(-1, nocc, nvir) #:g2ooov = eris_ooov * 2 - eris_ovoo.transpose(0,3,2,1) g2ooov, tmp_ovoo = tmp_ovoo.reshape(p1 - p0, nocc, nocc, nvir), None g2ooov += eris_ooov * 2 thetabuf = numpy.empty((blksize, nvir, nocc, nvir)) vikjc = numpy.empty((p1 - p0, nocc, blksize, nvir)) for j0, j1 in prange(0, nocc, blksize): theta = thetabuf[: j1 - j0] for i in range(j1 - j0): theta[i] = t2[j0 + i].transpose(1, 0, 2) * 2 - t2[j0 + i].transpose(2, 0, 1) #:vikjc = numpy.einsum('iklb,jlcb->ikjc', g2ooov, theta) if j1 == j0 + blksize: lib.dot( g2ooov.reshape(-1, nov), _cp(theta.reshape(-1, nov)).T, 1, vikjc.reshape((p1 - p0) * nocc, -1), 0 ) else: vikjc = lib.dot(g2ooov.reshape(-1, nov), _cp(theta.reshape(-1, nov)).T) vikjc = vikjc.reshape(p1 - p0, nocc, j1 - j0, nvir) wooov[:, j0:j1, :] += vikjc.transpose(0, 2, 1, 3) wooov[:, :, j0:j1] -= vikjc * 0.5 eris_ooov = eris_ovoo = g2ooov = vikjc = theta = thetabuf = None # ==== mem usage nocc*nvir**2*3 + nocc**2*nvir + nocc**3 + nocc*nvir**2 + nocc**2*nvir*3 eris_ovov = _cp(eris.ovov[p0:p1]) g2ovov = eris_ovov * 2 g2ovov -= eris_ovov.transpose(0, 3, 2, 1) tmpw4 = numpy.einsum("kcld,ld->kc", g2ovov, t1) #:w1 -= numpy.einsum('kcja,kjcb->ba', g2ovov, t2[p0:p1]) w1 -= lib.dot(t2[p0:p1].reshape(-1, nvir).T, _cp(g2ovov.transpose(0, 2, 1, 3).reshape(-1, nvir))) w1 -= numpy.einsum("ja,jb->ba", tmpw4, t1[p0:p1]) #:w2[p0:p1] += numpy.einsum('ibkc,jkbc->ij', g2ovov, t2) w2[p0:p1] += lib.dot(_cp(g2ovov.transpose(0, 2, 1, 3)).reshape(p1 - p0, -1), t2.reshape(nocc, -1).T) w2[p0:p1] += numpy.einsum("ib,jb->ij", tmpw4, t1) w3 += reduce(numpy.dot, (t1[p0:p1].T, tmpw4, t1.T)) w4[p0:p1] += tmpw4 vOVov = numpy.empty((nocc, nvir, p1 - p0, nvir)) #:vOVov += numpy.einsum('jbld,klcd->kcjb', g2ovov, t2) #:vOVov -= numpy.einsum('jbld,kldc->kcjb', eris_ovov, t2) for j in range(nocc): lib.dot( _cp(t2[j].transpose(1, 0, 2)).reshape(-1, nov), g2ovov.reshape(-1, nov).T, 1, vOVov[j].reshape(nvir, -1) ) lib.dot(t2[j].reshape(nov, -1).T, eris_ovov.reshape(-1, nov).T, -1, vOVov[j].reshape(nvir, -1), 1) vOVov = lib.transpose(vOVov.reshape(nov, -1)).reshape(p1 - p0, nvir, nocc, nvir) vOVov += eris_ovov g2ovov = tmp = tmpw4 = None # ==== mem usage nocc*nvir**2*4 + nocc**2*nvir + nocc**3 + nocc*nvir**2 #:tmp = numpy.einsum('jbld,kd->jlbk', eris_ovov, t1) #:wOVov -= numpy.einsum('jlbk,lc->jbkc', tmp, t1) #:tmp = numpy.einsum('jdlb,kd->jlbk', eris_ovov, t1) #:wOvOv += numpy.einsum('jlbk,lc->jbkc', tmp, t1) tmp = numpy.empty((nocc, nvir, nocc)) for j in range(p1 - p0): lib.dot(_cp(eris_ovov[j].transpose(1, 0, 2)).reshape(-1, nvir), t1.T, 1, tmp.reshape(-1, nocc)) lib.dot(tmp.reshape(nocc, -1).T, t1, -1, wOVov[j].reshape(-1, nvir), 1) lib.dot(eris_ovov[j].reshape(nvir, -1).T, t1.T, 1, tmp.reshape(-1, nocc)) lib.dot(tmp.reshape(nocc, -1).T, t1, 1, wOvOv[j].reshape(-1, nvir), 1) tmp = None #:vOvOv = numpy.einsum('jdlb,kldc->kcjb', eris_ovov, t2) ovovtmp = _cp(eris_ovov.transpose(0, 3, 2, 1)).reshape(-1, nov) vOvOv = numpy.empty((nocc, nvir, p1 - p0, nvir)) for j in range(nocc): lib.dot(t2[j].reshape(-1, nvir).T, ovovtmp.T, 1, vOvOv[j].reshape(nvir, -1)) ovovtmp = eris_ovov = None vOvOv = lib.transpose(vOvOv.reshape(nov, -1)).reshape(p1 - p0, nvir, nocc, nvir) vOvOv -= _cp(eris.oovv[p0:p1]).transpose(0, 3, 1, 2) wOVov += vOVov wOvOv += vOvOv saved.wOVov[p0:p1] = wOVov saved.wOvOv[p0:p1] = wOvOv wOVov = wOvOv = None # ==== mem usage nocc*nvir**2*6 + nocc**2*nvir + nocc**3 + nocc**2*nvir ov1 = vOvOv * 2 + vOVov #:wooov -= numpy.einsum('ibkc,jb->ijkc', ov1, t1) for i in range(p1 - p0): lib.dot(t1, ov1[i].reshape(nvir, -1), -1, wooov[i].reshape(nocc, -1), 1) ov1 = lib.transpose(ov1.reshape(-1, nov)) fswap["2vOvOv/%d" % istep] = ov1.reshape(nocc, nvir, -1, nvir) ov1 = None ov2 = vOVov * 2 + vOvOv w3 += numpy.einsum("kcjb,kc->bj", ov2, t1[p0:p1]) #:wooov += numpy.einsum('ibjc,kb->ijkc', ov2, t1) for i in range(p1 - p0): wooov[i] += lib.dot(t1, ov2[i].reshape(nvir, -1)).reshape(nocc, nocc, nvir).transpose(1, 0, 2) ov2 = lib.transpose(ov2.reshape(-1, nov)) fswap["2vovOV/%d" % istep] = ov2.reshape(nocc, nvir, -1, nvir) vOVov = vOvOv = None ov2 = None # ==== mem usage nocc*nvir**2*5 + nocc**2*nvir + nocc**3 woooo += _cp(eris.oooo[p0:p1]).transpose(0, 2, 1, 3) saved.woooo[p0:p1] = woooo saved.wooov[p0:p1] = wooov woooo = wooov = None time1 = log.timer_debug1("pass1 [%d:%d]" % (p0, p1), *time1) w3 += numpy.einsum("bc,jc->bj", w1, t1) w3 -= numpy.einsum("kj,kb->bj", w2, t1) for p0, p1 in prange(0, nocc, blksize): eris_ooov = _cp(eris.ooov[p0:p1]) g2ooov = eris_ooov * 2 g2ooov -= eris_ooov.transpose(0, 2, 1, 3) #:tmp = numpy.einsum('kjla,jb->kabl', g2ooov, t1) #:wovvv = numpy.einsum('kabl,lc->kabc', tmp, t1) #:wovvv += numpy.einsum('kjla,jlbc->kabc', g2ooov, t2) tmp = ( lib.dot(_cp(g2ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)).T, t1) .reshape(-1, nocc, nvir, nvir) .transpose(0, 2, 3, 1) ) wovvv = lib.dot(_cp(tmp.reshape(-1, nocc)), t1).reshape(-1, nvir, nvir, nvir) wovvv += lib.dot(_cp(g2ooov.transpose(0, 3, 1, 2).reshape(-1, nocc ** 2)), t2.reshape(nocc ** 2, -1)).reshape( -1, nvir, nvir, nvir ) tmp = g2ooov = None ov1 = numpy.empty((p1 - p0, nvir, nocc, nvir)) ov2 = numpy.empty((p1 - p0, nvir, nocc, nvir)) for istep, (j0, j1) in enumerate(prange(0, nocc, blksize)): ov1[:, :, j0:j1] = fswap["2vOvOv/%d" % istep][p0:p1] ov2[:, :, j0:j1] = fswap["2vovOV/%d" % istep][p0:p1] #:wovvv += numpy.einsum('kcja,jb->kabc', ov1, t1) #:wovvv -= numpy.einsum('kbja,jc->kabc', ov2, t1) wovvv += ( lib.dot(_cp(ov1.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1) .reshape(-1, nvir, nvir, nvir) .transpose(0, 2, 3, 1) ) wovvv -= ( lib.dot(_cp(ov2.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1) .reshape(-1, nvir, nvir, nvir) .transpose(0, 2, 1, 3) ) # ==== mem usage nvir**3 + nocc*nvir**2*2 eris_ooov = ov1 = ov2 = None for j0, j1 in prange(0, nocc, blksize): eris_ovvv = _cp(eris.ovvv[j0:j1]) eris_ovvv = lib.unpack_tril(eris_ovvv.reshape((j1 - j0) * nvir, -1)) eris_ovvv = eris_ovvv.reshape(j1 - j0, nvir, nvir, nvir) #:wovvv += numpy.einsum('jabd,kjdc->kabc', eris_ovvv, t2[p0:p1,j0:j1]) * -1.5 tmp_ovvv = numpy.empty((j1 - j0, nvir, nvir, nvir)) for i in range(j1 - j0): tmp_ovvv[i] = eris_ovvv[i].transpose(1, 0, 2) * 2 tmp = lib.dot( _cp(t2[p0:p1, j0:j1].transpose(0, 3, 1, 2).reshape((p1 - p0) * nvir, -1)), tmp_ovvv.reshape(-1, nvir ** 2), -1.5 / 2, ).reshape(-1, nvir, nvir, nvir) wovvv += tmp.transpose(0, 2, 3, 1) if p0 == j0: for i in range(p1 - p0): tmp_ovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) wovvv[i] += tmp_ovvv[i] tmp = tmp_ovvv = None g2ovvv = numpy.empty((j1 - j0, nvir, nvir, nvir)) for i in range(j1 - j0): g2ovvv[i] = eris_ovvv[i] * 2 g2ovvv[i] -= eris_ovvv[i].transpose(1, 2, 0) # ==== mem usage nvir**3*3 eris_ovvv = None theta = _cp(t2[p0:p1, j0:j1].transpose(0, 2, 1, 3) * 2) for i in range(p1 - p0): theta[i] -= t2[p0 + i, j0:j1].transpose(2, 0, 1) #:vkbca = numpy.einsum('jdca,kbjd->kbca', g2ovvv, theta) vkbca = lib.dot(theta.reshape((p1 - p0) * nvir, -1), g2ovvv.reshape(-1, nvir * nvir)).reshape( -1, nvir, nvir, nvir ) wovvv += vkbca.transpose(0, 3, 1, 2) wovvv -= vkbca.transpose(0, 3, 2, 1) * 0.5 # ==== mem usage nvir**3*3 + nocc*nvir**2 g2ovvv = theta = vkabc = None saved.wovvv[p0:p1] = wovvv time1 = log.timer_debug1("pass2 [%d:%d]" % (p0, p1), *time1) fswap.close() saved.w1 = w1 saved.w2 = w2 saved.w3 = w3 saved.w4 = w4 saved.ftmp.flush() return saved
def update_lambda(mycc, t1, t2, l1, l2, eris, imds): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape l1new = np.zeros_like(l1) foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvv = eris.fock[nocc:,nocc:] tau = _ccsd.make_tau(t2, t1, t1) theta = t2*2 - t2.transpose(0,1,3,2) mvv = lib.einsum('klca,klcb->ba', l2, theta) moo = lib.einsum('kicd,kjcd->ij', l2, theta) mvv1 = lib.einsum('jc,jb->bc', l1, t1) + mvv moo1 = lib.einsum('ic,kc->ik', l1, t1) + moo # m3 = einsum('ijab,acbd->ijcd', l2, vvvv) # = einsum('ijab,cadb->ijcd', l2.conj(), vvvv).conj() m3 = mycc._add_vvvv(None, l2.conj(), eris, with_ovvv=False).conj() m3 += lib.einsum('klab,ikjl->ijab', l2, imds.woooo) m3 *= .5 ovov = np.asarray(eris.ovov) l2tau = np.einsum('ijcd,klcd->ijkl', l2, tau) m3 += np.einsum('kalb,ijkl->ijab', ovov, l2tau) * .5 l2tau = None l2new = ovov.transpose(0,2,1,3) * .5 l2new += lib.einsum('ijac,cb->ijab', l2, imds.v1) l2new -= lib.einsum('ikab,jk->ijab', l2, imds.v2) l2new -= lib.einsum('ca,icjb->ijab', mvv1, ovov) l2new -= lib.einsum('ik,kajb->ijab', moo1, ovov) ovov = ovov * 2 - ovov.transpose(0,3,2,1) l1new -= np.einsum('ik,ka->ia', moo, imds.v4) l1new -= np.einsum('ca,ic->ia', mvv, imds.v4) l2new += np.einsum('ia,jb->ijab', l1, imds.v4) tmp = t1 + np.einsum('kc,kjcb->jb', l1, theta) tmp -= lib.einsum('bd,jd->jb', mvv1, t1) tmp -= lib.einsum('lj,lb->jb', moo, t1) l1new += np.einsum('jbia,jb->ia', ovov, tmp) ovov = tmp = None ovvv = np.asarray(eris.get_ovvv()) l1new += np.einsum('iacb,bc->ia', ovvv, mvv1) * 2 l1new -= np.einsum('ibca,bc->ia', ovvv, mvv1) l2new += lib.einsum('ic,jbca->jiba', l1, ovvv) l2t1 = np.einsum('ijcd,kd->ijck', l2, t1) m3 -= np.einsum('kbca,ijck->ijab', ovvv, l2t1) l2t1 = ovvv = None l2new += m3 l1new += np.einsum('ijab,jb->ia', m3, t1) * 2 l1new += np.einsum('jiba,jb->ia', m3, t1) * 2 l1new -= np.einsum('ijba,jb->ia', m3, t1) l1new -= np.einsum('jiab,jb->ia', m3, t1) ovoo = np.asarray(eris.ovoo) l1new -= np.einsum('iajk,kj->ia', ovoo, moo1) * 2 l1new += np.einsum('jaik,kj->ia', ovoo, moo1) l2new -= lib.einsum('ka,jbik->ijab', l1, ovoo) ovoo = None l2theta = l2*2 - l2.transpose(0,1,3,2) l2new += lib.einsum('ikac,jbck->ijab', l2theta, imds.wovvo) * .5 tmp = lib.einsum('ikca,jbck->ijab', l2, imds.woVVo) l2new += tmp * .5 l2new += tmp.transpose(1,0,2,3) l2theta = None l1new += fov l1new += lib.einsum('ib,ba->ia', l1, imds.v1) l1new -= lib.einsum('ja,ij->ia', l1, imds.v2) l1new += np.einsum('jb,iabj->ia', l1, eris.ovvo) * 2 l1new -= np.einsum('jb,ijba->ia', l1, eris.oovv) l1new -= lib.einsum('ijbc,bacj->ia', l2, imds.wvvvo) l1new -= lib.einsum('kjca,ijck->ia', l2, imds.woovo) l1new += np.einsum('ijab,bj->ia', l2, imds.w3) * 2 l1new -= np.einsum('ijba,bj->ia', l2, imds.w3) eia = lib.direct_sum('i-j->ij', foo.diagonal(), fvv.diagonal() + mycc.level_shift) l1new /= eia l1new += l1 l2new = l2new + l2new.transpose(1,0,3,2) l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) l2new += l2 time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def gamma2_outcore(mycc, t1, t2, l1, l2, h5fobj): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 dovov = h5fobj.create_dataset('dovov', (nocc, nvir, nocc, nvir), 'f8') dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair, nvir_pair), 'f8') doooo = h5fobj.create_dataset('doooo', (nocc, nocc, nocc, nocc), 'f8') doovv = h5fobj.create_dataset('doovv', (nocc, nocc, nvir, nvir), 'f8') dovvo = h5fobj.create_dataset('dovvo', (nocc, nvir, nvir, nocc), 'f8') dooov = h5fobj.create_dataset('dooov', (nocc, nocc, nocc, nvir), 'f8') _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) mOvOv = fswap.create_dataset('mOvOv', (nocc, nvir, nocc, nvir), 'f8') mOVov = fswap.create_dataset('mOVov', (nocc, nvir, nocc, nvir), 'f8') moo = numpy.empty((nocc, nocc)) mvv = numpy.zeros((nvir, nvir)) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 * 5 blksize = max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1( 'rdm intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) time1 = time.clock(), time.time() for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): #:theta = make_theta(t2[p0:p1]) #:pOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2[p0:p1]) #:pOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2[p0:p1]) #:pOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) pOvOv = numpy.empty((nocc, p1 - p0, nvir, nvir)) pOVov = numpy.empty((nocc, p1 - p0, nvir, nvir)) t2a = numpy.empty((p1 - p0, nvir, nocc, nvir)) t2b = numpy.empty((p1 - p0, nvir, nocc, nvir)) theta = make_theta(t2[p0:p1]) tmp = numpy.empty_like(t2a) for i in range(p1 - p0): t2a[i] = t2[p0 + i].transpose(2, 0, 1) t2b[i] = t2[p0 + i].transpose(1, 0, 2) tmp[i] = theta[i].transpose(1, 0, 2) t2a = t2a.reshape(-1, nov) t2b = t2b.reshape(-1, nov) theta, tmp = tmp.reshape(-1, nov), None for i in range(nocc): pOvOv[i] = lib.dot(t2a, l2[i].reshape(nov, -1)).reshape(-1, nvir, nvir) pOVov[i] = lib.dot(t2b, l2[i].reshape(nov, -1), -1).reshape(-1, nvir, nvir) pOVov[i] += lib.dot(theta, _cp(l2[i].transpose(0, 2, 1).reshape( nov, -1))).reshape(-1, nvir, nvir) theta = t2a = t2b = None mOvOv[p0:p1] = pOvOv.transpose(1, 2, 0, 3) mOVov[p0:p1] = pOVov.transpose(1, 2, 0, 3) fswap['mvOvO/%d' % istep] = pOvOv.transpose(3, 1, 2, 0) fswap['mvOVo/%d' % istep] = pOVov.transpose(3, 1, 2, 0) moo[p0:p1] = (numpy.einsum('ljdd->jl', pOvOv) * 2 + numpy.einsum('ljdd->jl', pOVov)) mvv += (numpy.einsum('llbd->bd', pOvOv[p0:p1]) * 2 + numpy.einsum('llbd->bd', pOVov[p0:p1])) pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass1 [%d:%d]' % (p0, p1), *time1) mia = (numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo * .5 gooov = numpy.einsum('ji,ka->jkia', moo * -.5, t1) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc**3 + nocc**2 * nvir + nocc * nvir**2 * 6 blksize = max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1( 'rdm intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[p0:p1], t1[p0:p1], t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1, nvir**2), l2.reshape(-1, nvir**2).T, .5) goooo = goooo.reshape(-1, nocc, nocc, nocc) h5fobj['doooo'][p0:p1] = make_theta(goooo).transpose(0, 2, 1, 3) #:gooov[p0:p1] -= numpy.einsum('ib,jkba->jkia', l1, tau) #:gooov[p0:p1] -= numpy.einsum('jkba,ib->jkia', l2[p0:p1], t1) #:gooov[p0:p1] += numpy.einsum('jkil,la->jkia', goooo, t1*2) for i in range(p0, p1): gooov[i] -= lib.dot( _cp(tau[i - p0].transpose(0, 2, 1).reshape(-1, nvir)), l1.T).reshape(nocc, nvir, nocc).transpose(0, 2, 1) gooov[i] -= lib.dot( _cp(l2[i].transpose(0, 2, 1).reshape(-1, nvir)), t1.T).reshape(nocc, nvir, nocc).transpose(0, 2, 1) lib.dot(goooo.reshape(-1, nocc), t1, 2, gooov[p0:p1].reshape(-1, nvir), 1) #:goovv -= numpy.einsum('jk,ikab->ijab', mij, tau) goovv = numpy.einsum('ia,jb->ijab', mia[p0:p1], t1) for i in range(p1 - p0): lib.dot(mij, tau[i].reshape(nocc, -1), -1, goovv[i].reshape(nocc, -1), 1) goovv[i] += .5 * l2[p0 + i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2[p0:p1]) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2[p0:p1].reshape(-1, nvir), mab, -1, goovv.reshape(-1, nvir), 1) lib.dot(tau.reshape(-1, nvir), mvv.T, -.5, goovv.reshape(-1, nvir), 1) tau = None #==== mem usage nocc**3 + nocc*nvir**2 pOvOv = _cp(mOvOv[p0:p1]) pOVov = _cp(mOVov[p0:p1]) #:gooov[p0:p1,:] += numpy.einsum('jaic,kc->jkia', pOvOv, t1) #:gooov[:,p0:p1] -= numpy.einsum('kaic,jc->jkia', pOVov, t1) tmp = lib.dot(pOvOv.reshape(-1, nvir), t1.T).reshape(p1 - p0, -1, nocc, nocc) gooov[p0:p1, :] += tmp.transpose(0, 3, 2, 1) lib.dot(t1, pOVov.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1), 0) gooov[:, p0:p1] -= tmp.reshape(nocc, p1 - p0, nvir, nocc).transpose(0, 1, 3, 2) #:tmp = numpy.einsum('ikac,jc->jika', l2, t1[p0:p1]) #:gOvVo -= numpy.einsum('jika,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jkia,kb->jabi', tmp, t1) + pOvOv.transpose(0,3,1,2) tmp = tmp.reshape(-1, nocc, nocc, nvir) lib.dot(t1[p0:p1], l2.reshape(-1, nvir).T, 1, tmp.reshape(p1 - p0, -1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1[p0:p1]) gOvvO = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): gOvVo[i] -= lib.dot( _cp(tmp[i].transpose(0, 2, 1).reshape(-1, nocc)), t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvVo[i] += pOVov[i].transpose(2, 0, 1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc, -1).T, t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvvO[i] += pOvOv[i].transpose(2, 0, 1) tmp = None #==== mem usage nocc**3 + nocc*nvir**6 dovvo[p0:p1] = (gOvVo * 2 + gOvvO).transpose(0, 2, 1, 3) gOvvO *= -2 gOvvO -= gOvVo doovv[p0:p1] = gOvvO.transpose(0, 3, 1, 2) gOvvO = gOvVo = None for j0, j1 in prange(0, nocc, blksize): tau2 = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo[:, :, j0:j1].copy().reshape((p1 - p0) * nocc, -1), tau2.reshape(-1, nvir**2), 1, goovv.reshape(-1, nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1[j0:j1], t1) tau2 = _cp(tau2.transpose(0, 3, 1, 2).reshape(-1, nov)) #:goovv[:,j0:j1] += numpy.einsum('ibld,jlda->ijab', pOvOv, tau2) * .5 #:goovv[:,j0:j1] -= numpy.einsum('iald,jldb->ijab', pOVov, tau2) * .5 goovv[:, j0:j1] += lib.dot(pOvOv.reshape(-1, nov), tau2.T, .5).reshape(p1 - p0, nvir, -1, nvir).transpose(0, 2, 3, 1) goovv[:, j0:j1] += lib.dot(pOVov.reshape(-1, nov), tau2.T, -.5).reshape(p1 - p0, nvir, -1, nvir).transpose(0, 2, 1, 3) tau2 = None #==== mem usage nocc**3 + nocc*nvir**2*7 #:goovv += numpy.einsum('iald,jlbd->ijab', pOVov*2+pOvOv, t2) * .5 pOVov *= 2 pOVov += pOvOv for j in range(nocc): tmp = lib.dot(pOVov.reshape(-1, nov), _cp(t2[j].transpose(0, 2, 1).reshape(-1, nvir)), .5) goovv[:, j] += tmp.reshape(-1, nvir, nvir) tmp = None dovov[p0:p1] = make_theta(goovv).transpose(0, 2, 1, 3) goooo = goovv = pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass2 [%d:%d]' % (p0, p1), *time1) h5fobj['dooov'][:] = gooov.transpose(0, 2, 1, 3) * 2 - gooov.transpose( 1, 2, 0, 3) gooov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nocc**2 * nvir * 2 + nocc * nvir**2 * 2, nvir**3 * 2 + nocc * nvir**2) blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit))) iobuflen = int(256e6 / 8 / blksize) log.debug1( 'rdm intermediates pass 3: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir + blksize - 1) / blksize)) h5fobj.create_group('dovvv') for istep, (p0, p1) in enumerate(prange(0, nvir, blksize)): pvOvO = numpy.empty((p1 - p0, nocc, nvir, nocc)) pvOVo = numpy.empty((p1 - p0, nocc, nvir, nocc)) ao2mo.outcore._load_from_h5g(fswap['mvOvO'], p0, p1, pvOvO) ao2mo.outcore._load_from_h5g(fswap['mvOVo'], p0, p1, pvOVo) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) gvovv = lib.dot(pvOVo.reshape(-1, nocc), t1).reshape(-1, nocc, nvir, nvir) for i in range(p1 - p0): gvovv[i] = gvovv[i].transpose(0, 2, 1) lib.dot(pvOvO.reshape(-1, nocc), t1, -1, gvovv.reshape(-1, nvir), 1) pvOvO = pvOVo = None #==== mem usage nocc**2*nvir*2 + nocc*nvir**2*2 l2tmp = l2[:, :, p0:p1] * .5 #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2tmp, t2) #:jabc = numpy.einsum('ijab,ic->jabc', l2tmp, t1) #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) gvvvv = lib.dot(l2tmp.reshape(nocc**2, -1).T, t2.reshape(nocc**2, -1)) jabc = lib.dot(l2tmp.reshape(nocc, -1).T, t1) lib.dot(jabc.reshape(nocc, -1).T, t1, 1, gvvvv.reshape(-1, nvir), 1) gvvvv = gvvvv.reshape(-1, nvir, nvir, nvir) l2tmp = jabc = None #:gvovv = numpy.einsum('ja,jibc->aibc', l1[:,p0:p1], t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1[:,p0:p1]) lib.dot(l1[:, p0:p1].copy().T, t2.reshape(nocc, -1), 1, gvovv.reshape(p1 - p0, -1), 1) lib.dot(t1[:, p0:p1].copy().T, l2.reshape(nocc, -1), 1, gvovv.reshape(p1 - p0, -1), 1) tmp = numpy.einsum('ja,jb->ab', l1[:, p0:p1], t1) gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) gvovv += numpy.einsum('ba,ic->aibc', mvv[:, p0:p1] * .5, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) for j in range(p1 - p0): lib.dot(t1, gvvvv[j].reshape(nvir, -1), -2, gvovv[j].reshape(nocc, -1), 1) # symmetrize dvvvv because it is symmetrized in ccsd_grad and make_rdm2 anyway #:dvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(dvvvv+dvvvv.transpose(1,0,3,2)) # now dvvvv == dvvvv.transpose(2,3,0,1) == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir, nvir, nvir)) tmp1 = numpy.empty((nvir, nvir, nvir)) tmpvvvv = numpy.empty((p1 - p0, nvir, nvir_pair)) for i in range(p1 - p0): make_theta(gvvvv[i:i + 1], out=tmp) tmp1[:] = tmp.transpose(1, 0, 2) _ccsd.precontract(tmp1, diag_fac=2, out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i - p0, j] += tmpvvvv[j - p0, i] tmpvvvv[i - p0, i] *= 2 for i in range(p0, p1): off = i * (i + 1) // 2 if p0 > 0: tmpvvvv[i - p0, :p0] += dvvvv[off:off + p0] dvvvv[off:off + i + 1] = tmpvvvv[i - p0, :i + 1] * .25 for i in range(p1, nvir): off = i * (i + 1) // 2 dvvvv[off + p0:off + p1] = tmpvvvv[:, i] tmp = tmp1 = tmpvvvv = None #==== mem usage nvir**3 + nocc*nvir**2 gvvov = make_theta(gvovv).transpose(0, 2, 1, 3) ao2mo.outcore._transpose_to_h5g(h5fobj, 'dovvv/%d' % istep, gvvov.reshape(-1, nov), iobuflen) gvvvv = None gvovv = None time1 = log.timer_debug1('rdm intermediates pass3 [%d:%d]' % (p0, p1), *time1) del (fswap['mOvOv']) del (fswap['mOVov']) del (fswap['mvOvO']) del (fswap['mvOVo']) fswap.close() _tmpfile = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], None, h5fobj['dovvv'], h5fobj['dooov'])
def make_intermediates(mycc, t1, t2, eris): nocc, nvir = t1.shape foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvo = eris.fock[nocc:,:nocc] fvv = eris.fock[nocc:,nocc:] tau = _ccsd.make_tau(t2, t1, t1) ovov = np.asarray(eris.ovov) ovoo = np.asarray(eris.ovoo) ovov1 = ovov * 2 - ovov.transpose(0,3,2,1) ovoo1 = ovoo * 2 - ovoo.transpose(2,1,0,3) v1 = fvv - lib.einsum('ja,jb->ba', fov, t1) v1 -= lib.einsum('jakc,jkbc->ba', ovov1, tau) v2 = foo + lib.einsum('ib,jb->ij', fov, t1) v2 += lib.einsum('ibkc,jkbc->ij', ovov1, tau) v2 += np.einsum('kbij,kb->ij', ovoo1, t1) v4 = fov + np.einsum('jbkc,kc->jb', ovov1, t1) v5 = np.einsum('kc,jkbc->bj', fov, t2) * 2 v5 -= np.einsum('kc,jkcb->bj', fov, t2) v5 += fvo v5 += lib.einsum('kc,kb,jc->bj', v4, t1, t1) v5 -= lib.einsum('lckj,klbc->bj', ovoo1, t2) oooo = np.asarray(eris.oooo) woooo = lib.einsum('icjl,kc->ikjl', ovoo, t1) woooo += lib.einsum('jcil,kc->iljk', ovoo, t1) woooo += oooo.copy() woooo += lib.einsum('icjd,klcd->ikjl', ovov, tau) theta = t2*2 - t2.transpose(0,1,3,2) v4OVvo = lib.einsum('ldjb,klcd->jbck', ovov1, t2) v4OVvo -= lib.einsum('ldjb,kldc->jbck', ovov, t2) v4OVvo += np.asarray(eris.ovvo) v4oVVo = lib.einsum('jdlb,kldc->jbck', ovov, t2) v4oVVo -= np.asarray(eris.oovv).transpose(0,3,2,1) v4ovvo = v4OVvo*2 + v4oVVo w3 = np.einsum('jbck,jb->ck', v4ovvo, t1) woovo = lib.einsum('ibck,jb->ijck', v4ovvo, t1) woovo = woovo - woovo.transpose(0,3,2,1) woovo += lib.einsum('ibck,jb->ikcj', v4OVvo-v4oVVo, t1) woovo += ovoo1.conj().transpose(3,2,1,0) woovo += lib.einsum('lcik,jlbc->ikbj', ovoo1, theta) woovo -= lib.einsum('lcik,jlbc->ijbk', ovoo1, t2) woovo -= lib.einsum('iclk,ljbc->ijbk', ovoo1, t2) wvvvo = lib.einsum('jack,jb->back', v4ovvo, t1) wvvvo = wvvvo - wvvvo.transpose(2,1,0,3) wvvvo += lib.einsum('jack,jb->cabk', v4OVvo-v4oVVo, t1) wvvvo -= lib.einsum('lajk,jlbc->cabk', ovoo1, tau) wOVvo = v4OVvo woVVo = v4oVVo wOVvo -= np.einsum('jbld,kd,lc->jbck', ovov, t1, t1) woVVo += np.einsum('jdlb,kd,lc->jbck', ovov, t1, t1) wOVvo -= lib.einsum('jblk,lc->jbck', ovoo, t1) woVVo += lib.einsum('lbjk,lc->jbck', ovoo, t1) v4ovvo = v4OVvo = v4oVVo = None ovvv = np.asarray(eris.get_ovvv()) wvvvo += lib.einsum('kacd,kjbd->bacj', ovvv, t2) * 1.5 wOVvo += lib.einsum('jbcd,kd->jbck', ovvv, t1) woVVo -= lib.einsum('jdcb,kd->jbck', ovvv, t1) ovvv = ovvv*2 - ovvv.transpose(0,3,2,1) v1 += np.einsum('jcba,jc->ba', ovvv, t1) v5 += lib.einsum('kdbc,jkcd->bj', ovvv, t2) woovo += lib.einsum('idcb,jkdb->ijck', ovvv, tau) tmp = lib.einsum('kdca,jkbd->cabj', ovvv, theta) wvvvo -= tmp wvvvo += tmp.transpose(2,1,0,3) * .5 wvvvo -= ovvv.conj().transpose(3,2,1,0) ovvv = tmp = None w3 += v5 w3 += np.einsum('cb,jb->cj', v1, t1) w3 -= np.einsum('jk,jb->bk', v2, t1) class _IMDS: pass imds = _IMDS() imds.ftmp = lib.H5TmpFile() dtype = np.result_type(t2, eris.vvvv).char imds.woooo = imds.ftmp.create_dataset('woooo', (nocc,nocc,nocc,nocc), dtype) imds.wovvo = imds.ftmp.create_dataset('wovvo', (nocc,nvir,nvir,nocc), dtype) imds.woVVo = imds.ftmp.create_dataset('woVVo', (nocc,nvir,nvir,nocc), dtype) imds.woovo = imds.ftmp.create_dataset('woovo', (nocc,nocc,nvir,nocc), dtype) imds.wvvvo = imds.ftmp.create_dataset('wvvvo', (nvir,nvir,nvir,nocc), dtype) imds.woooo[:] = woooo imds.wovvo[:] = wOVvo*2 + woVVo imds.woVVo[:] = woVVo imds.woovo[:] = woovo imds.wvvvo[:] = wvvvo imds.v1 = v1 imds.v2 = v2 imds.w3 = w3 imds.v4 = v4 imds.ftmp.flush() return imds
def gamma2_incore(mycc, t1, t2, l1, l2): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir time1 = time.clock(), time.time() #:theta = make_theta(t2) #:mOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2) #:mOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2) #:mOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) l2a = numpy.empty((nocc, nvir, nocc, nvir)) t2a = numpy.empty((nocc, nvir, nocc, nvir)) for i in range(nocc): l2a[i] = l2[i].transpose(2, 0, 1) t2a[i] = t2[i].transpose(2, 0, 1) mOvOv = lib.dot(t2a.reshape(-1, nov), l2a.reshape(-1, nov).T).reshape(nocc, nvir, nocc, nvir) for i in range(nocc): t2a[i] = t2[i].transpose(1, 0, 2) mOVov = lib.dot(t2a.reshape(-1, nov), l2a.reshape(-1, nov).T, -1).reshape(nocc, nvir, nocc, nvir) theta = t2a for i in range(nocc): l2a[i] = l2[i].transpose(1, 0, 2) theta[i] *= 2 theta[i] -= t2[i].transpose(2, 0, 1) lib.dot(theta.reshape(-1, nov), l2a.reshape(nov, -1).T, 1, mOVov.reshape(nov, -1), 1) theta = l2a = t2a = None moo = (numpy.einsum('jdld->jl', mOvOv) * 2 + numpy.einsum('jdld->jl', mOVov)) mvv = (numpy.einsum('lbld->bd', mOvOv) * 2 + numpy.einsum('lbld->bd', mOVov)) mia = (numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo * .5 gooov = numpy.zeros((nocc, nocc, nocc, nvir)) tau = _ccsd.make_tau(t2, t1, t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1, nvir**2), l2.reshape(-1, nvir**2).T, .5) goooo = goooo.reshape(-1, nocc, nocc, nocc) doooo = _cp(make_theta(goooo).transpose(0, 2, 1, 3)) #:gooov -= numpy.einsum('ib,kjab->jkia', l1, tau) #:gooov -= numpy.einsum('kjab,ib->jkia', l2, t1) #:gooov += numpy.einsum('jkil,la->jkia', goooo, t1*2) gooov = lib.dot(_cp(tau.reshape(-1, nvir)), l1.T, -1) lib.dot(_cp(l2.reshape(-1, nvir)), t1.T, -1, gooov, 1) gooov = gooov.reshape(nocc, nocc, nvir, nocc) tmp = numpy.einsum('ji,ka->jkia', moo * -.5, t1) tmp += gooov.transpose(1, 0, 3, 2) gooov, tmp = tmp, None lib.dot(goooo.reshape(-1, nocc), t1, 2, gooov.reshape(-1, nvir), 1) goovv = numpy.einsum('ia,jb->ijab', mia, t1) for i in range(nocc): goovv[i] += .5 * l2[i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('jk,kiba->jiba', mij, tau) lib.dot(mij, tau.reshape(nocc, -1), -1, goovv.reshape(nocc, -1), 1) #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2.reshape(-1, nvir), mab, -1, goovv.reshape(-1, nvir), 1) lib.dot(tau.reshape(-1, nvir), mvv.T, -.5, goovv.reshape(-1, nvir), 1) tau = None #:gooov += numpy.einsum('jaic,kc->jkia', mOvOv, t1) #:gooov -= numpy.einsum('kaic,jc->jkia', mOVov, t1) tmp = lib.dot(mOvOv.reshape(-1, nvir), t1.T).reshape(nocc, -1, nocc, nocc) gooov += tmp.transpose(0, 3, 2, 1) lib.dot(t1, mOVov.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1), 0) gooov -= tmp.reshape(nocc, nocc, nvir, nocc).transpose(0, 1, 3, 2) dooov = gooov.transpose(0, 2, 1, 3) * 2 - gooov.transpose(1, 2, 0, 3) gooov = None #:tmp = numpy.einsum('ikac,jc->jaik', l2, t1) #:gOvVo -= numpy.einsum('jaik,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jaki,kb->jabi', tmp, t1) + mOvOv.transpose(0,3,1,2) tmp = tmp.reshape(nocc, nocc, nocc, nvir) lib.dot(t1, l2.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1) gOvvO = numpy.empty((nocc, nvir, nvir, nocc)) for i in range(nocc): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0, 2, 1).reshape(-1, nocc)), t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvVo[i] += mOVov[i].transpose(2, 0, 1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc, -1).T, t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvvO[i] += mOvOv[i].transpose(2, 0, 1) tmp = None dovvo = numpy.empty((nocc, nvir, nvir, nocc)) doovv = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): tmp = gOvVo[i] * 2 + gOvvO[i] dovvo[i] = tmp.transpose(1, 0, 2) tmp = gOvvO[i] * -2 - gOvVo[i] doovv[i] = tmp.transpose(2, 0, 1) gOvvO = gOvVo = None tau2 = _ccsd.make_tau(t2, t1, t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo.reshape(nocc * nocc, -1), tau2.reshape(-1, nvir**2), 1, goovv.reshape(-1, nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1, t1) tau2p = tau2.reshape(nocc, nvir, nocc, nvir) for i in range(nocc): tau2p[i] = tau2[i].transpose(2, 0, 1) tau2, tau2p = tau2p.reshape(nov, -1), None #:goovv += numpy.einsum('ibld,jlda->ijab', mOvOv, tau2) * .5 #:goovv -= numpy.einsum('iald,jldb->ijab', mOVov, tau2) * .5 tmp = lib.dot(mOvOv.reshape(-1, nov), tau2.T, .5).reshape(nocc, nvir, -1, nvir) for i in range(nocc): tmp[i] = goovv[i].transpose(1, 0, 2) + tmp[i].transpose(2, 1, 0) goovv, tmp = tmp, None lib.dot(mOVov.reshape(-1, nov), tau2.T, -.5, goovv.reshape(nov, -1), 1) #:goovv += numpy.einsum('iald,jlbd->ijab', mOVov*2+mOvOv, t2) * .5 t2a, tau2 = tau2.reshape(nocc, nvir, nocc, nvir), None for i in range(nocc): t2a[i] = t2[i].transpose(1, 0, 2) tmp = mOVov * 2 tmp += mOvOv lib.dot(tmp.reshape(-1, nov), t2a.reshape(nov, -1), .5, goovv.reshape(nov, -1), 1) t2a = tmp = None for i in range(nocc): goovv[i] = goovv[i] * 2 - goovv[i].transpose(2, 1, 0) dovov = goovv goooo = goovv = None #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) mOVov = lib.transpose(mOVov.reshape(nov, -1)) gvovv = lib.dot(mOVov.reshape(nocc, -1).T, t1).reshape(nvir, nocc, nvir, nvir) mOVov = None tmp = numpy.einsum('ja,jb->ab', l1, t1) #:gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) #:gvovv += numpy.einsum('ba,ic->aibc', mvv, t1*.5) for i in range(nvir): gvovv[i] += numpy.einsum('b,ic->icb', tmp[i], t1) gvovv[i] += numpy.einsum('b,ic->icb', mvv[:, i] * .5, t1) gvovv[i] = gvovv[i].transpose(0, 2, 1) #:gvovv += numpy.einsum('ja,jibc->aibc', l1, t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) mOvOv = lib.transpose(mOvOv.reshape(nov, -1)) lib.dot(mOvOv.reshape(nocc, -1).T, t1, -1, gvovv.reshape(-1, nvir), 1) mOvOv = None lib.dot(l1.T, t2.reshape(nocc, -1), 1, gvovv.reshape(nvir, -1), 1) lib.dot(t1.T, l2.reshape(nocc, -1), 1, gvovv.reshape(nvir, -1), 1) tmp = numpy.empty((nocc, nvir, nvir)) for i in range(nvir): #:gvovv*2 - gvovv.transpose(0,1,3,2) gvovv[i] = _ccsd.make_021(gvovv[i], gvovv[i], 2, -1, out=tmp) #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2, t2)*.5 #:jabc = numpy.einsum('ijab,ic->jabc', l2, t1) * .5 #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) tau = _ccsd.make_tau(t2, t1, t1) theta = make_theta(tau) tau = None l2tmp = _ccsd.pack_tril(l2.reshape(-1, nvir, nvir)) gtmp = lib.dot(l2tmp.T, theta.reshape(nocc**2, -1), .5).reshape(-1, nvir, nvir) l2tmp = theta = None nvir_pair = nvir * (nvir + 1) // 2 tmp = numpy.empty((nvir, nvir, nvir)) tmp1 = numpy.empty((nvir, nvir, nvir)) tmptril = numpy.empty((nvir, nvir_pair)) diag_idx = numpy.arange(nvir) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx dvvvv = numpy.empty((nvir_pair, nvir_pair)) dovvv = numpy.empty((nocc, nvir, nvir, nvir)) # dvvov = (gvovv*2 - gvovv.transpose(0,1,3,2)).transpose(0,2,1,3) # dovvv = dvvov.transpose(2,3,0,1) p0 = 0 for i in range(nvir): tmp[:i + 1] = gtmp[p0:p0 + i + 1] for j in range(i + 1, nvir): tmp[j] = gtmp[j * (j + 1) // 2 + i].T lib.dot(t1, tmp.reshape(nvir, -1), -2, gvovv[i].reshape(nocc, -1), 1) dovvv[:, :, i] = gvovv[i].transpose(0, 2, 1) #:gvvvv[i] = (tmp*2-tmp.transpose(0,2,1)).transpose(1,0,2) #:gvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(gvvvv+gvvvv.transpose(1,0,3,2)) tmp1[:] = tmp.transpose(1, 0, 2) _ccsd.precontract(tmp1, diag_fac=2, out=tmptril) dvvvv[p0:p0 + i] += tmptril[:i] dvvvv[p0:p0 + i] *= .25 dvvvv[i * (i + 1) // 2 + i] = tmptril[i] * .5 for j in range(i + 1, nvir): dvvvv[j * (j + 1) // 2 + i] = tmptril[j] p0 += i + 1 gtmp = tmp = tmp1 = tmptril = gvovv = None dvvov = dovvv.transpose(2, 3, 0, 1) return (dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov)
def update_lambda(mycc, t1, t2, l1, l2, eris=None, imds=None): if imds is None: imds = make_intermediates(mycc, t1, t2, eris) time0 = logger.process_clock(), logger.perf_counter() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape fov = eris.fock[:nocc, nocc:] mo_e_o = eris.mo_energy[:nocc] mo_e_v = eris.mo_energy[nocc:] + mycc.level_shift theta = t2 * 2 - t2.transpose(0, 1, 3, 2) mba = lib.einsum('klca,klcb->ba', l2, theta) mij = lib.einsum('ikcd,jkcd->ij', l2, theta) theta = None mba1 = numpy.einsum('jc,jb->bc', l1, t1) + mba mij1 = numpy.einsum('kb,jb->kj', l1, t1) + mij mia1 = t1 + numpy.einsum('kc,jkbc->jb', l1, t2) * 2 mia1 -= numpy.einsum('kc,jkcb->jb', l1, t2) mia1 -= reduce(numpy.dot, (t1, l1.T, t1)) mia1 -= numpy.einsum('bd,jd->jb', mba, t1) mia1 -= numpy.einsum('lj,lb->jb', mij, t1) l2new = mycc._add_vvvv(None, l2, eris, with_ovvv=False, t2sym='jiba') l1new = numpy.einsum('ijab,jb->ia', l2new, t1) * 2 l1new -= numpy.einsum('jiab,jb->ia', l2new, t1) l2new *= .5 # *.5 because of l2+l2.transpose(1,0,3,2) in the end tmp = None w1 = imds.w1 - numpy.diag(mo_e_v) w2 = imds.w2 - numpy.diag(mo_e_o) l1new += fov l1new += numpy.einsum('ib,ba->ia', l1, w1) l1new -= numpy.einsum('ja,ij->ia', l1, w2) l1new -= numpy.einsum('ik,ka->ia', mij, imds.w4) l1new -= numpy.einsum('ca,ic->ia', mba, imds.w4) l1new += numpy.einsum('ijab,bj->ia', l2, imds.w3) * 2 l1new -= numpy.einsum('ijba,bj->ia', l2, imds.w3) l2new += numpy.einsum('ia,jb->ijab', l1, imds.w4) l2new += lib.einsum('jibc,ca->jiba', l2, w1) l2new -= lib.einsum('jk,kiba->jiba', w2, l2) eris_ovoo = _cp(eris.ovoo) l1new -= numpy.einsum('iajk,kj->ia', eris_ovoo, mij1) * 2 l1new += numpy.einsum('jaik,kj->ia', eris_ovoo, mij1) l2new -= lib.einsum('jbki,ka->jiba', eris_ovoo, l1) eris_ovoo = None tau = _ccsd.make_tau(t2, t1, t1) l2tau = lib.einsum('ijcd,klcd->ijkl', l2, tau) tau = None l2t1 = lib.einsum('jidc,kc->ijkd', l2, t1) max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) unit = nocc * nvir**2 * 5 blksize = min(nocc, max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit))) log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) l1new -= numpy.einsum('jb,jiab->ia', l1, _cp(eris.oovv)) for p0, p1 in lib.prange(0, nvir, blksize): eris_ovvv = eris.get_ovvv(slice(None), slice(p0, p1)) l1new[:, p0:p1] += numpy.einsum('iabc,bc->ia', eris_ovvv, mba1) * 2 l1new -= numpy.einsum('ibca,bc->ia', eris_ovvv, mba1[p0:p1]) l2new[:, :, p0:p1] += lib.einsum('jbac,ic->jiba', eris_ovvv, l1) m4 = lib.einsum('ijkd,kadb->ijab', l2t1, eris_ovvv) l2new[:, :, p0:p1] -= m4 l1new[:, p0:p1] -= numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijab,ia->jb', m4, t1[:, p0:p1]) * 2 l1new[:, p0:p1] += numpy.einsum('jiab,jb->ia', m4, t1) l1new += numpy.einsum('jiab,ia->jb', m4, t1[:, p0:p1]) eris_ovvv = m4 = None eris_voov = _cp(eris.ovvo[:, p0:p1].transpose(1, 0, 3, 2)) l1new[:, p0:p1] += numpy.einsum('jb,aijb->ia', l1, eris_voov) * 2 l2new[:, :, p0:p1] += eris_voov.transpose(1, 2, 0, 3) * .5 l2new[:, :, p0:p1] -= lib.einsum('bjic,ca->jiba', eris_voov, mba1) l2new[:, :, p0:p1] -= lib.einsum('bjka,ik->jiba', eris_voov, mij1) l1new[:, p0:p1] += numpy.einsum('aijb,jb->ia', eris_voov, mia1) * 2 l1new -= numpy.einsum('bija,jb->ia', eris_voov, mia1[:, p0:p1]) m4 = lib.einsum('ijkl,aklb->ijab', l2tau, eris_voov) l2new[:, :, p0:p1] += m4 * .5 l1new[:, p0:p1] += numpy.einsum('ijab,jb->ia', m4, t1) * 2 l1new -= numpy.einsum('ijba,jb->ia', m4, t1[:, p0:p1]) saved_wvooo = _cp(imds.wvooo[p0:p1]) l1new -= lib.einsum('ckij,jkca->ia', saved_wvooo, l2[:, :, p0:p1]) saved_wvovv = _cp(imds.wvvov[p0:p1]) # Watch out memory usage here, due to the l2 transpose l1new[:, p0:p1] += lib.einsum('abkc,kibc->ia', saved_wvovv, l2) saved_wvooo = saved_wvovv = None saved_wvOOv = _cp(imds.wvOOv[p0:p1]) tmp_voov = _cp(imds.wVOov[p0:p1]) * 2 tmp_voov += saved_wvOOv tmp = l2.transpose(0, 2, 1, 3) - l2.transpose(0, 3, 1, 2) * .5 l2new[:, :, p0:p1] += lib.einsum('iakc,bjkc->jiba', tmp, tmp_voov) tmp = None tmp = lib.einsum('jkca,bikc->jiba', l2, saved_wvOOv) l2new[:, :, p0:p1] += tmp l2new[:, :, p0:p1] += tmp.transpose(1, 0, 2, 3) * .5 saved_wvOOv = tmp = None saved_woooo = _cp(imds.woooo) m3 = lib.einsum('ijkl,klab->ijab', saved_woooo, l2) l2new += m3 * .5 l1new += numpy.einsum('ijab,jb->ia', m3, t1) * 2 l1new -= numpy.einsum('ijba,jb->ia', m3, t1) saved_woooo = m3 = None #time1 = log.timer_debug1('lambda pass [%d:%d]'%(p0, p1), *time1) eia = lib.direct_sum('i-a->ia', mo_e_o, mo_e_v) l1new /= eia # l2new = l2new + l2new.transpose(1,0,3,2) # l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) # l2new += l2 for i in range(nocc): if i > 0: l2new[i, :i] += l2new[:i, i].transpose(0, 2, 1) l2new[i, :i] /= lib.direct_sum('a,jb->jab', eia[i], eia[:i]) l2new[:i, i] = l2new[i, :i].transpose(0, 2, 1) l2new[i, i] = l2new[i, i] + l2new[i, i].T l2new[i, i] /= lib.direct_sum('a,b->ab', eia[i], eia[i]) time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new
def update_lambda(mycc, t1, t2, l1, l2, eris, imds): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape l1new = np.zeros_like(l1) foo = eris.fock[:nocc,:nocc] fov = eris.fock[:nocc,nocc:] fvv = eris.fock[nocc:,nocc:] tau = _ccsd.make_tau(t2, t1, t1) theta = t2*2 - t2.transpose(0,1,3,2) mvv = lib.einsum('klca,klcb->ba', l2, theta) moo = lib.einsum('kicd,kjcd->ij', l2, theta) mvv1 = lib.einsum('jc,jb->bc', l1, t1) + mvv moo1 = lib.einsum('ic,kc->ik', l1, t1) + moo # m3 = einsum('ijab,acbd->ijcd', l2, vvvv) # = einsum('ijab,cadb->ijcd', l2.conj(), vvvv).conj() m3 = mycc._add_vvvv(None, l2.conj(), eris, with_ovvv=False).conj() m3 += lib.einsum('klab,ikjl->ijab', l2, imds.woooo) m3 *= .5 ovov = np.asarray(eris.ovov) l2tau = np.einsum('ijcd,klcd->ijkl', l2, tau) m3 += np.einsum('kalb,ijkl->ijab', ovov, l2tau) * .5 l2tau = None l2new = ovov.transpose(0,2,1,3) * .5 l2new += lib.einsum('ijac,cb->ijab', l2, imds.v1) l2new -= lib.einsum('ikab,jk->ijab', l2, imds.v2) l2new -= lib.einsum('ca,icjb->ijab', mvv1, ovov) l2new -= lib.einsum('ik,kajb->ijab', moo1, ovov) ovov = ovov * 2 - ovov.transpose(0,3,2,1) l1new -= np.einsum('ik,ka->ia', moo, imds.v4) l1new -= np.einsum('ca,ic->ia', mvv, imds.v4) l2new += np.einsum('ia,jb->ijab', l1, imds.v4) tmp = t1 + np.einsum('kc,kjcb->jb', l1, theta) tmp -= lib.einsum('bd,jd->jb', mvv1, t1) tmp -= lib.einsum('lj,lb->jb', moo, t1) l1new += np.einsum('jbia,jb->ia', ovov, tmp) ovov = tmp = None ovvv = np.asarray(eris.get_ovvv()) l1new += np.einsum('iacb,bc->ia', ovvv, mvv1) * 2 l1new -= np.einsum('ibca,bc->ia', ovvv, mvv1) l2new += lib.einsum('ic,jbca->jiba', l1, ovvv) l2t1 = np.einsum('ijcd,kd->ijck', l2, t1) m3 -= np.einsum('kbca,ijck->ijab', ovvv, l2t1) l2t1 = ovvv = None l2new += m3 l1new += np.einsum('ijab,jb->ia', m3, t1) * 2 l1new += np.einsum('jiba,jb->ia', m3, t1) * 2 l1new -= np.einsum('ijba,jb->ia', m3, t1) l1new -= np.einsum('jiab,jb->ia', m3, t1) ovoo = np.asarray(eris.ovoo) l1new -= np.einsum('iajk,kj->ia', ovoo, moo1) * 2 l1new += np.einsum('jaik,kj->ia', ovoo, moo1) l2new -= lib.einsum('ka,jbik->ijab', l1, ovoo) ovoo = None l2theta = l2*2 - l2.transpose(0,1,3,2) l2new += lib.einsum('ikac,jbck->ijab', l2theta, imds.wovvo) * .5 tmp = lib.einsum('ikca,jbck->ijab', l2, imds.woVVo) l2new += tmp * .5 l2new += tmp.transpose(1,0,2,3) l2theta = None l1new += fov l1new += lib.einsum('ib,ba->ia', l1, imds.v1) l1new -= lib.einsum('ja,ij->ia', l1, imds.v2) l1new += np.einsum('jb,iabj->ia', l1, eris.ovvo) * 2 l1new -= np.einsum('jb,ijba->ia', l1, eris.oovv) l1new -= lib.einsum('ijbc,bacj->ia', l2, imds.wvvvo) l1new -= lib.einsum('kjca,ijck->ia', l2, imds.woovo) l1new += np.einsum('ijab,bj->ia', l2, imds.w3) * 2 l1new -= np.einsum('ijba,bj->ia', l2, imds.w3) eia = lib.direct_sum('i-j->ij', foo.diagonal(), fvv.diagonal()) l1new /= eia l1new += l1 l2new = l2new + l2new.transpose(1,0,3,2) l2new /= lib.direct_sum('ia+jb->ijab', eia, eia) l2new += l2 time0 = log.timer_debug1('update l1 l2', *time0) return l1new, l2new