def eaccsd_star_contract(eom, eaccsd_evals, eaccsd_evecs, leaccsd_evecs, imds=None): """ Returns: e_star (list of float): The EA-CCSD* energy. Notes: See `ipccsd_star_contract` for description of arguments. Reference: Saeh, Stanton "...energy surfaces of radicals" JCP 111, 8275 (1999) """ assert (eom.partition == None) cpu1 = cpu0 = (time.clock(), time.time()) log = logger.Logger(eom.stdout, eom.verbose) if imds is None: imds = eom.make_imds() t1, t2 = imds.t1, imds.t2 eris = imds.eris assert (isinstance(eris, gccsd._PhysicistsERIs)) fock = eris.fock nocc, nvir = t1.shape nmo = nocc + nvir fov = fock[:nocc, nocc:].diagonal() foo = fock[:nocc, :nocc].diagonal() fvv = fock[nocc:, nocc:].diagonal() vvvv = _cp(eris.vvvv) oovv = _cp(eris.oovv) ovvv = _cp(eris.ovvv) ovov = _cp(eris.ovov) ovvo = -_cp(eris.ovov).transpose(0, 1, 3, 2) ooov = _cp(eris.ooov) vooo = _cp(ooov).conj().transpose(3, 2, 1, 0) vvvo = _cp(ovvv).conj().transpose(3, 2, 1, 0) # Create denominator eabc = fvv[:, None, None] + fvv[None, :, None] + fvv[None, None, :] eij = foo[:, None] + foo[None, :] eijabc = eij[:, :, None, None, None] - eabc[None, None, :, :, :] # Permutation operators def pabc(tmp): '''P(abc)''' return tmp + tmp.transpose(0, 1, 3, 4, 2) + tmp.transpose( 0, 1, 4, 2, 3) def pij(tmp): '''P(ij)''' return tmp - tmp.transpose(1, 0, 2, 3, 4) def pab(tmp): '''P(ab)''' return tmp - tmp.transpose(0, 1, 3, 2, 4) eaccsd_evecs = np.array(eaccsd_evecs) leaccsd_evecs = np.array(leaccsd_evecs) e_star = [] eaccsd_evecs, leaccsd_evecs = [ np.atleast_2d(x) for x in [eaccsd_evecs, leaccsd_evecs] ] eaccsd_evals = np.atleast_1d(eaccsd_evals) for ea_eval, ea_evec, ea_levec in zip(eaccsd_evals, eaccsd_evecs, leaccsd_evecs): # Enforcing <L|R> = 1 l1, l2 = vector_to_amplitudes_ea(ea_levec, nmo, nocc) r1, r2 = vector_to_amplitudes_ea(ea_evec, nmo, nocc) ldotr = np.dot(l1, r1) + 0.5 * np.dot(l2.ravel(), r2.ravel()) logger.info(eom, 'Left-right amplitude overlap : %14.8e', ldotr) if abs(ldotr) < 1e-7: logger.warn( eom, 'Small %s left-right amplitude overlap. Results ' 'may be inaccurate.', ldotr) l1 /= ldotr l2 /= ldotr # Denominator + eigenvalue(EA-CCSD) denom = eijabc + ea_eval denom = 1. / denom tmp = lib.einsum('c,ijab->ijabc', l1, oovv) lijabc = -pabc(tmp) tmp = lib.einsum('jima,mbc->ijabc', ooov, l2) lijabc += -pabc(tmp) tmp = lib.einsum('ieab,jce->ijabc', ovvv, l2) tmp = pabc(tmp) lijabc += -pij(tmp) tmp = lib.einsum('bcef,f->bce', vvvv, r1) tmp = lib.einsum('bce,ijae->ijabc', tmp, t2) rijabc = -pabc(tmp) tmp = lib.einsum('mcje,e->mcj', ovov, r1) tmp = lib.einsum('mcj,imab->ijabc', tmp, t2) tmp = pabc(tmp) rijabc += pij(tmp) tmp = lib.einsum('amij,mcb->ijabc', vooo, r2) rijabc += pabc(tmp) tmp = lib.einsum('baei,jce->ijabc', vvvo, r2) tmp = pabc(tmp) rijabc -= pij(tmp) deltaE = (1. / 12) * lib.einsum('ijabc,ijabc,ijabc', lijabc, rijabc, denom) deltaE = deltaE.real logger.info(eom, "Exc. energy, delta energy = %16.12f, %16.12f", ea_eval + deltaE, deltaE) e_star.append(ea_eval + deltaE) return e_star
def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None, mf_grad=None, verbose=logger.INFO): if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 if l1 is None: l1 = mycc.l1 if l2 is None: l2 = mycc.l2 if eris is None: eris = ccsd._ERIS(mycc) if mf_grad is None: mf_grad = pyscf.grad.RHF(mycc._scf) log = logger.Logger(mycc.stdout, mycc.verbose) time0 = time.clock(), time.time() mol = mycc.mol moidx = numpy.ones(mycc.mo_energy.size, dtype=numpy.bool) if isinstance(mycc.frozen, (int, numpy.integer)): raise NotImplementedError('frozen orbital ccsd_grad') moidx[:mycc.frozen] = False else: moidx[mycc.frozen] = False mo_coeff = mycc.mo_coeff[:, moidx] #FIXME: ensure mycc.mo_coeff is canonical orbital mo_energy = mycc.mo_energy[moidx] nocc, nvir = t1.shape nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 log.debug('Build ccsd rdm1 intermediates') d1 = ccsd_t_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2, eris) doo, dov, dvo, dvv = d1 time1 = log.timer('rdm1 intermediates', *time0) log.debug('Build ccsd rdm2 intermediates') d2 = ccsd_t_rdm.gamma2_intermediates(mycc, t1, t2, l1, l2, eris) time1 = log.timer('rdm2 intermediates', *time1) log.debug('Build ccsd response_rdm1') Ioo, Ivv, Ivo, Xvo = IX_intermediates(mycc, t1, t2, l1, l2, d1, d2, eris) time1 = log.timer('response_rdm1 intermediates', *time1) dm1mo = ccsd_grad.response_dm1(mycc, t1, t2, l1, l2, eris, (Ioo, Ivv, Ivo, Xvo)) dm1mo[:nocc, :nocc] = doo * 2 dm1mo[nocc:, nocc:] = dvv * 2 dm1ao = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) im1 = numpy.zeros_like(dm1mo) im1[:nocc, :nocc] = Ioo im1[nocc:, nocc:] = Ivv im1[nocc:, :nocc] = Ivo im1[:nocc, nocc:] = Ivo.T im1 = reduce(numpy.dot, (mo_coeff, im1, mo_coeff.T)) time1 = log.timer('response_rdm1', *time1) log.debug('symmetrized rdm2 and MO->AO transformation') dm2ao = ccsd_grad._rdm2_mo2ao(mycc, d2, dm1mo, mo_coeff) time1 = log.timer('MO->AO transformation', *time1) #TODO: pass hf_grad object to compute h1 and s1 log.debug('h1 and JK1') h1 = mf_grad.get_hcore(mol) s1 = mf_grad.get_ovlp(mol) zeta = numpy.empty((nmo, nmo)) zeta[:nocc, :nocc] = (mo_energy[:nocc].reshape(-1, 1) + mo_energy[:nocc]) * .5 zeta[nocc:, nocc:] = (mo_energy[nocc:].reshape(-1, 1) + mo_energy[nocc:]) * .5 zeta[nocc:, :nocc] = mo_energy[:nocc] zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1) zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T) vhf4sij = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1ao + dm1ao.T), p1)) time1 = log.timer('h1 and JK1', *time1) # Hartree-Fock part contribution hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ) dm1ao += hf_dm1 zeta += mf_grad.make_rdm1e(mycc.mo_energy, mycc.mo_coeff, mycc.mo_occ) if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() de = numpy.zeros((len(atmlst), 3)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] = (numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) + numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1])) # h[1] \dot DM, *2 for +c.c., contribute to f1 vrinv = mf_grad._grad_rinv(mol, ia) de[k] += (numpy.einsum('xij,ij->x', h1[:, p0:p1], dm1ao[p0:p1]) + numpy.einsum('xji,ij->x', h1[:, p0:p1], dm1ao[:, p0:p1])) de[k] += (numpy.einsum('xij,ij->x', vrinv, dm1ao) + numpy.einsum('xji,ij->x', vrinv, dm1ao)) # -s[1]*e \dot DM, contribute to f1 de[k] -= (numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) + numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1])) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf4sij[p0:p1]) * 2 # 2e AO integrals dot 2pdm eri1 = gto.moleintor.getints('cint2e_ip1_sph', mol._atm, mol._bas, mol._env, numpy.arange(shl0, shl1), comp=3, aosym='s2kl').reshape( 3, p1 - p0, nao, -1) dm2buf = ccsd_grad._load_block_tril(dm2ao, p0, p1) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2 for i in range(3): #:tmp = _ccsd.unpack_tril(eri1[i].reshape(-1,nao_pair)) #:vj = numpy.einsum('ijkl,kl->ij', tmp, hf_dm1) #:vk = numpy.einsum('ijkl,jk->il', tmp, hf_dm1) vj, vk = ccsd_grad.hf_get_jk_incore(eri1[i], hf_dm1) de[k, i] -= (numpy.einsum('ij,ij->', vj, hf_dm1[p0:p1]) - numpy.einsum('ij,ij->', vk, hf_dm1[p0:p1]) * .5) * 2 eri1 = dm2buf = None log.debug('grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer('grad of atom %d' % ia, *time1) log.note('CCSD gradinets') log.note('==============') log.note(' x y z') for k, ia in enumerate(atmlst): log.note('%d %s %15.9f %15.9f %15.9f', ia, mol.atom_symbol(ia), de[k, 0], de[k, 1], de[k, 2]) log.timer('CCSD gradients', *time0) return de
def label_symmetry_(mc, mo_coeff, ci0=None): log = logger.Logger(mc.stdout, mc.verbose) #irrep_name = mc.mol.irrep_name irrep_name = mc.mol.irrep_id s = mc._scf.get_ovlp() try: orbsym = scf.hf_symm.get_orbsym(mc._scf.mol, mo_coeff, s, True) except ValueError: log.warn('mc1step_symm symmetrizes input orbitals') ncore = mc.ncore nocc = mc.ncore + mc.ncas mo_cor = symm.symmetrize_space(mc.mol, mo_coeff[:, :ncore], s=s, check=False) mo_act = symm.symmetrize_space(mc.mol, mo_coeff[:, ncore:nocc], s=s, check=False) mo_vir = symm.symmetrize_space(mc.mol, mo_coeff[:, nocc:], s=s, check=False) mo_coeff = numpy.hstack((mo_cor, mo_act, mo_vir)) orbsym = symm.label_orb_symm(mc.mol, irrep_name, mc.mol.symm_orb, mo_coeff, s=s) mo_coeff_with_orbsym = lib.tag_array(mo_coeff, orbsym=orbsym) active_orbsym = getattr(mc.fcisolver, 'orbsym', []) if (not getattr(active_orbsym, '__len__', None)) or len(active_orbsym) == 0: ncore = mc.ncore nocc = mc.ncore + mc.ncas mc.fcisolver.orbsym = orbsym[ncore:nocc] log.debug('Active space irreps %s', str(mc.fcisolver.orbsym)) wfnsym = 0 if getattr(mc.fcisolver, 'wfnsym', None) is not None: wfnsym = mc.fcisolver.wfnsym elif ci0 is None: # Guess wfnsym based on HF determinant. mo_coeff may not be HF # canonical orbitals. Some checks are needed to ensure that mo_coeff # are derived from the symmetry adapted SCF calculations. if mo_coeff is mc._scf.mo_coeff: wfnsym = 0 for ir in orbsym[mc._scf.mo_occ == 1]: wfnsym ^= ir mc.fcisolver.wfnsym = wfnsym log.debug('Set CASCI wfnsym %s based on HF determinant', wfnsym) elif getattr(mo_coeff, 'orbsym', None) is not None: # It may be reordered SCF orbitals ncore = mc.ncore nocc = mc.ncore + mc.ncas cas_orb = mo_coeff[:, ncore:nocc] s = reduce(numpy.dot, (cas_orb.T, mc._scf.get_ovlp(), mc._scf.mo_coeff)) if numpy.all(numpy.max(s, axis=1) > 1 - 1e-9): idx = numpy.argmax(s, axis=1) cas_orbsym = orbsym[ncore:nocc] cas_occ = mc._scf.mo_occ[idx] wfnsym = 0 for ir in cas_orbsym[cas_occ == 1]: wfnsym ^= ir mc.fcisolver.wfnsym = wfnsym log.debug( 'Active space are constructed from canonical SCF ' 'orbitals %s', idx) log.debug('Set CASCI wfnsym %s based on HF determinant', wfnsym) elif getattr(mc.fcisolver, 'guess_wfnsym', None): wfnsym = mc.fcisolver.guess_wfnsym(mc.ncas, mc.nelecas, ci0, verbose=log) log.debug('CASCI wfnsym %s (based on CI initial guess)', wfnsym) if isinstance(wfnsym, (int, numpy.integer)): wfnsym = symm.irrep_id2name(mc.mol.groupname, wfnsym) log.info('Active space CI wfn symmetry = %s', wfnsym) return mo_coeff_with_orbsym
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, cderi_file, 'int3c2e_sph', aosym='s2', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) feri = h5py.File(cderi_file) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # feri['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() feri['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T aoaux = LkR = LkI = coulG = None j2c = None def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d'%ji][:naux0,col0:col1] = v del(feri['j2c/%d'%uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d'%ji][:naux0] del(feri['j3c/%d'%ji]) feri['j3c/%d'%ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpts_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpts_band is not None and abs(kpt - kpts_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1, 3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpts_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpts_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset, nao, nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt, kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) vjR = numpy.zeros((nset, nao, nao)) vjI = numpy.zeros((nset, nao, nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mydf.gs) vkR = numpy.zeros((nset, nao, nao)) vkI = numpy.zeros((nset, nao, nao)) dmsR = numpy.asarray(dms.real.reshape(nset, nao, nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset, nao, nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory * .25e6 / 16 / nao**2), 16) pLqR = pLqI = None for pqkR, pqkI, p0, p1 in mydf.pw_loop(mydf.gs, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair' % (p0, p1), *t2) pqkR = pqkR.reshape(nao, nao, -1) pqkI = pqkI.reshape(nao, nao, -1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR += numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI -= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: coulG = numpy.sqrt(vkcoulG[p0:p1]) pqkR *= coulG pqkI *= coulG #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0, 2, 1), out=pLqR).reshape(-1, nao) pLqI = lib.transpose(pqkI, axes=(0, 2, 1), out=pLqI).reshape(-1, nao) iLkR = numpy.ndarray((nao * (p1 - p0), nao), buffer=pqkR) iLkI = numpy.ndarray((nao * (p1 - p0), nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR) lib.dot(pLqI, dmsR[i], 1, iLkI) lib.dot(iLkR.reshape(nao, -1), pLqR.reshape(nao, -1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao, -1), pLqI.reshape(nao, -1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao, -1), iLkI.reshape(nao, -1), pLqR.reshape(nao, -1).T, pLqI.reshape(nao, -1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j if cell.dimension != 3 and exxdiv: assert (exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def _make_j3c(mydf, mol, auxmol): log = logger.Logger(mydf.stdout, mydf.verbose) atm, bas, env, ao_loc = incore._env_and_aoloc('cint3c2e_sph', mol, auxmol) nao = ao_loc[mol.nbas] naux = ao_loc[-1] - nao nao_pair = nao * (nao+1) // 2 cintopt = gto.moleintor.make_cintopt(atm, bas, env, 'cint3c2e_sph') if mydf.approx_sr_level == 0: get_Lpq = _make_Lpq(mydf, mol, auxmol) else: get_Lpq = _make_Lpq_atomic_approx(mydf, mol, auxmol) feri = h5py.File(mydf._cderi) chunks = (min(256,naux), min(256,nao_pair)) # 512K feri.create_dataset('j3c', (naux,nao_pair), 'f8', chunks=chunks) feri.create_dataset('Lpq', (naux,nao_pair), 'f8', chunks=chunks) def save(label, dat, col0, col1): feri[label][:,col0:col1] = dat Gv, Gvbase, kws = non_uniform_kgrids(mydf.gs) gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) kk = numpy.einsum('ki,ki->k', Gv, Gv) # idx = numpy.argsort(kk)[::-1] # kk = kk[idx] # Gv = Gv[idx] # kws = kws[idx] # gxyz = gxyz[idx] coulG = .5/numpy.pi**2 * kws / kk aoaux = ft_ao.ft_ao(auxmol, Gv, None, numpy.eye(3), gxyz, Gvbase) kLR = numpy.asarray(aoaux.real, order='C') kLI = numpy.asarray(aoaux.imag, order='C') j2c = auxmol.intor('cint2c2e_sph', hermi=1).T # .T to C-ordr lib.dot(kLR.T*coulG, kLR, -1, j2c, 1) lib.dot(kLI.T*coulG, kLI, -1, j2c, 1) kLR *= coulG.reshape(-1,1) kLI *= coulG.reshape(-1,1) aoaux = coulG = kk = kws = idx = None max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) buflen = min(max(int(max_memory*.3*1e6/8/naux), 1), nao_pair) shranges = outcore._guess_shell_ranges(mol, buflen, 's2ij') buflen = max([x[2] for x in shranges]) blksize = max(16, int(max_memory*.15*1e6/16/buflen)) blksize = min(blksize, Gv.shape[0], 16384) pqkbuf = numpy.empty(buflen*blksize) bufs1 = numpy.empty((buflen*naux)) # bufs2 holds either Lpq and ft_aopair bufs2 = numpy.empty(max(buflen*(naux+1),buflen*blksize*2)) # *2 for cmplx col1 = 0 for istep, sh_range in enumerate(shranges): log.debug('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol shls_slice = (bstart, bend, 0, bend, mol.nbas, mol.nbas+auxmol.nbas) Lpq = get_Lpq(shls_slice, col0, col1, bufs2) save('Lpq', Lpq, col0, col1) j3c = _ri.nr_auxe2('cint3c2e_sph', atm, bas, env, shls_slice, ao_loc, 's2ij', 1, cintopt, bufs1) j3c = j3c.T # -> (L|pq) in C-order lib.dot(j2c, Lpq, -.5, j3c, 1) Lpq = None for p0, p1 in lib.prange(0, Gv.shape[0], blksize): aoao = ft_ao.ft_aopair(mol, Gv[p0:p1], shls_slice[:4], 's2', numpy.eye(3), gxyz[p0:p1], Gvbase, buf=bufs2) nG = p1 - p0 pqkR = numpy.ndarray((ncol,nG), buffer=pqkbuf) pqkR[:] = aoao.real.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3c, 1) pqkI = numpy.ndarray((ncol,nG), buffer=pqkbuf) pqkI[:] = aoao.imag.T lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3c, 1) aoao = aoaux = None save('j3c', j3c, col0, col1) feri.close()
def davidson_cc(h_op, g_op, precond, x0, tol=1e-10, xs=[], ax=[], max_cycle=30, lindep=1e-14, verbose=logger.WARN): if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(sys.stdout, verbose) toloose = numpy.sqrt(tol) # the first trial vector is (1,0,0,...), which is not included in xs xs = list(xs) ax = list(ax) nx = len(xs) if nx == 0: xs.append(x0) ax.append(h_op(x0)) nx = 1 heff = numpy.zeros((max_cycle + nx + 1, max_cycle + nx + 1), dtype=x0.dtype) ovlp = numpy.eye(max_cycle + nx + 1, dtype=x0.dtype) w_t = 0 for istep in range(max_cycle): g = g_op() nx = len(xs) for i in range(nx): heff[i + 1, 0] = numpy.dot(xs[i].conj(), g) heff[nx, i + 1] = numpy.dot(xs[nx - 1].conj(), ax[i]) ovlp[nx, i + 1] = numpy.dot(xs[nx - 1].conj(), xs[i]) heff[0, :nx + 1] = heff[:nx + 1, 0].conj() heff[:nx, nx] = heff[nx, :nx].conj() ovlp[:nx, nx] = ovlp[nx, :nx].conj() nvec = nx + 1 # s0 = scipy.linalg.eigh(ovlp[:nvec,:nvec])[0][0] # if s0 < lindep: # yield True, istep, w_t, xtrial, hx, dx, s0 # break wlast = w_t xtrial, w_t, v_t, index, seig = \ _regular_step(heff[:nvec,:nvec], ovlp[:nvec,:nvec], xs, lindep, log) s0 = seig[0] hx = _dgemv(v_t[1:], ax) # note g*v_t[0], as the first trial vector is (1,0,0,...) dx = hx + g * v_t[0] - w_t * v_t[0] * xtrial norm_dx = numpy.linalg.norm(dx) log.debug1('... AH step %d index= %d |dx|= %.5g eig= %.5g v[0]= %.5g lindep= %.5g', \ istep+1, index, norm_dx, w_t, v_t[0], s0) hx *= 1 / v_t[0] # == h_op(xtrial) if (abs(w_t - wlast) < tol and norm_dx < toloose) or s0 < lindep: # Avoid adding more trial vectors if hessian converged yield True, istep + 1, w_t, xtrial, hx, dx, s0 if s0 < lindep or norm_dx < lindep: # or numpy.linalg.norm(xtrial) < lindep: # stop the iteration because eigenvectors would be barely updated break else: yield False, istep + 1, w_t, xtrial, hx, dx, s0 x0 = precond(dx, w_t) xs.append(x0) ax.append(h_op(x0))
def detect_symm(atoms, basis=None, verbose=logger.WARN): '''Detect the point group symmetry for given molecule. Return group name, charge center, and nex_axis (three rows for x,y,z) ''' if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(sys.stdout, verbose) tol = TOLERANCE / numpy.sqrt(1 + len(atoms)) decimals = int(-numpy.log10(tol)) log.debug('geometry tol = %g', tol) rawsys = SymmSys(atoms, basis) w1, u1 = rawsys.cartesian_tensor(1) axes = u1.T log.debug('principal inertia moments %s', w1) if numpy.allclose(w1, 0, atol=tol): gpname = 'SO3' return gpname, rawsys.charge_center, numpy.eye(3) elif numpy.allclose(w1[:2], 0, atol=tol): # linear molecule if rawsys.has_icenter(): gpname = 'Dooh' else: gpname = 'Coov' return gpname, rawsys.charge_center, axes else: w1_degeneracy = _degeneracy(w1, decimals) n = None c2x = None mirrorx = None if 3 in w1_degeneracy: # T, O, I # Because rotation vectors Rx Ry Rz are 3-degenerated representation # See http://www.webqc.org/symmetrypointgroup-td.html w2, u2 = rawsys.cartesian_tensor(2) w3, u3 = rawsys.cartesian_tensor(3) w2_degeneracy = _degeneracy(w2, decimals) w3_degeneracy = _degeneracy(w3, decimals) log.debug('2d tensor %s', w2) log.debug('3d tensor %s', w3) if (5 in w2_degeneracy and 4 in w3_degeneracy and len(w3_degeneracy) == 3): # I group gpname, new_axes = _search_i_group(rawsys) if gpname is not None: return gpname, rawsys.charge_center, _refine(new_axes) elif 3 in w2_degeneracy and len(w2_degeneracy) <= 3: # T/O group gpname, new_axes = _search_ot_group(rawsys) if gpname is not None: return gpname, rawsys.charge_center, _refine(new_axes) elif 2 in w1_degeneracy: if numpy.allclose(w1[1], w1[2], atol=tol): axes = axes[[1, 2, 0]] n = rawsys.search_c_highest(axes[2])[1] if n == 1: n = None else: c2x = rawsys.search_c2x(axes[2], n) mirrorx = rawsys.search_mirrorx(axes[2], n) else: n = -1 # tag as D2h and subgroup # They must not be I/O/T group, at most one C3 or higher rotation axis if n is None: zaxis, n = rawsys.search_c_highest() if n > 1: c2x = rawsys.search_c2x(zaxis, n) mirrorx = rawsys.search_mirrorx(zaxis, n) if c2x is not None: axes = _make_axes(zaxis, c2x) elif mirrorx is not None: axes = _make_axes(zaxis, mirrorx) else: for axis in numpy.eye(3): if not parallel_vectors(axis, zaxis): axes = _make_axes(zaxis, axis) break else: # Ci or Cs or C1 with degenerated w1 mirror = rawsys.search_mirrorx(None, 1) if mirror is not None: xaxis = numpy.array((1., 0., 0.)) axes = _make_axes(mirror, xaxis) else: axes = numpy.eye(3) log.debug('Highest C_n = C%d', n) if n >= 2: if c2x is not None: if rawsys.has_mirror(axes[2]): gpname = 'D%dh' % n elif rawsys.has_improper_rotation(axes[2], n): gpname = 'D%dd' % n else: gpname = 'D%d' % n yaxis = numpy.cross(axes[2], c2x) axes = _make_axes(axes[2], c2x) elif mirrorx is not None: gpname = 'C%dv' % n axes = _make_axes(axes[2], mirrorx) elif rawsys.has_mirror(axes[2]): gpname = 'C%dh' % n elif rawsys.has_improper_rotation(axes[2], n): gpname = 'S%d' % (n * 2) else: gpname = 'C%d' % n return gpname, rawsys.charge_center, _refine(axes) else: is_c2x = rawsys.has_rotation(axes[0], 2) is_c2y = rawsys.has_rotation(axes[1], 2) is_c2z = rawsys.has_rotation(axes[2], 2) # rotate to old axes, as close as possible? if is_c2z and is_c2x and is_c2y: if rawsys.has_icenter(): gpname = 'D2h' else: gpname = 'D2' axes = alias_axes(axes, numpy.eye(3)) elif is_c2z or is_c2x or is_c2y: if is_c2x: axes = axes[[1, 2, 0]] if is_c2y: axes = axes[[2, 0, 1]] if rawsys.has_mirror(axes[2]): gpname = 'C2h' elif rawsys.has_mirror(axes[0]): gpname = 'C2v' else: gpname = 'C2' else: if rawsys.has_icenter(): gpname = 'Ci' elif rawsys.has_mirror(axes[0]): gpname = 'Cs' axes = axes[[1, 2, 0]] elif rawsys.has_mirror(axes[1]): gpname = 'Cs' axes = axes[[2, 0, 1]] elif rawsys.has_mirror(axes[2]): gpname = 'Cs' else: gpname = 'C1' return gpname, rawsys.charge_center, axes
def update_amps(cc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) t1a, t1b = t1 t2aa, t2ab, t2bb = t2 nocca, noccb, nvira, nvirb = t2ab.shape mo_ea_o = eris.mo_energy[0][:nocca] mo_ea_v = eris.mo_energy[0][nocca:] mo_eb_o = eris.mo_energy[1][:noccb] mo_eb_v = eris.mo_energy[1][noccb:] fova = eris.focka[:nocca, nocca:] fovb = eris.fockb[:noccb, noccb:] u1a = np.zeros_like(t1a) u1b = np.zeros_like(t1b) #:eris_vvvv = ao2mo.restore(1, np.asarray(eris.vvvv), nvirb) #:eris_VVVV = ao2mo.restore(1, np.asarray(eris.VVVV), nvirb) #:eris_vvVV = _restore(np.asarray(eris.vvVV), nvira, nvirb) #:u2aa += lib.einsum('ijef,aebf->ijab', tauaa, eris_vvvv) * .5 #:u2bb += lib.einsum('ijef,aebf->ijab', taubb, eris_VVVV) * .5 #:u2ab += lib.einsum('iJeF,aeBF->iJaB', tauab, eris_vvVV) tauaa, tauab, taubb = make_tau(t2, t1, t1) u2aa, u2ab, u2bb = cc._add_vvvv(None, (tauaa, tauab, taubb), eris) u2aa *= .5 u2bb *= .5 Fooa = .5 * lib.einsum('me,ie->mi', fova, t1a) Foob = .5 * lib.einsum('me,ie->mi', fovb, t1b) Fvva = -.5 * lib.einsum('me,ma->ae', fova, t1a) Fvvb = -.5 * lib.einsum('me,ma->ae', fovb, t1b) Fooa += eris.focka[:nocca, :nocca] - np.diag(mo_ea_o) Foob += eris.fockb[:noccb, :noccb] - np.diag(mo_eb_o) Fvva += eris.focka[nocca:, nocca:] - np.diag(mo_ea_v) Fvvb += eris.fockb[noccb:, noccb:] - np.diag(mo_eb_v) dtype = u2aa.dtype wovvo = np.zeros((nocca, nvira, nvira, nocca), dtype=dtype) wOVVO = np.zeros((noccb, nvirb, nvirb, noccb), dtype=dtype) woVvO = np.zeros((nocca, nvirb, nvira, noccb), dtype=dtype) woVVo = np.zeros((nocca, nvirb, nvirb, nocca), dtype=dtype) wOvVo = np.zeros((noccb, nvira, nvirb, nocca), dtype=dtype) wOvvO = np.zeros((noccb, nvira, nvira, noccb), dtype=dtype) mem_now = lib.current_memory()[0] max_memory = max(0, cc.max_memory - mem_now) if nvira > 0 and nocca > 0: blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / (nvira**3 * 3 + 1))) for p0, p1 in lib.prange(0, nocca, blksize): ovvv = eris.get_ovvv(slice(p0, p1)) # ovvv = eris.ovvv[p0:p1] ovvv = ovvv - ovvv.transpose(0, 3, 2, 1) Fvva += np.einsum('mf,mfae->ae', t1a[p0:p1], ovvv) wovvo[p0:p1] += lib.einsum('jf,mebf->mbej', t1a, ovvv) u1a += 0.5 * lib.einsum('mief,meaf->ia', t2aa[p0:p1], ovvv) u2aa[:, p0:p1] += lib.einsum('ie,mbea->imab', t1a, ovvv.conj()) tmp1aa = lib.einsum('ijef,mebf->ijmb', tauaa, ovvv) u2aa -= lib.einsum('ijmb,ma->ijab', tmp1aa, t1a[p0:p1] * .5) ovvv = tmp1aa = None if nvirb > 0 and noccb > 0: blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / (nvirb**3 * 3 + 1))) for p0, p1 in lib.prange(0, noccb, blksize): OVVV = eris.get_OVVV(slice(p0, p1)) # OVVV = eris.OVVV[p0:p1] OVVV = OVVV - OVVV.transpose(0, 3, 2, 1) Fvvb += np.einsum('mf,mfae->ae', t1b[p0:p1], OVVV) wOVVO[p0:p1] = lib.einsum('jf,mebf->mbej', t1b, OVVV) u1b += 0.5 * lib.einsum('MIEF,MEAF->IA', t2bb[p0:p1], OVVV) u2bb[:, p0:p1] += lib.einsum('ie,mbea->imab', t1b, OVVV.conj()) tmp1bb = lib.einsum('ijef,mebf->ijmb', taubb, OVVV) u2bb -= lib.einsum('ijmb,ma->ijab', tmp1bb, t1b[p0:p1] * .5) OVVV = tmp1bb = None if nvirb > 0 and nocca > 0: blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / (nvira * nvirb**2 * 3 + 1))) for p0, p1 in lib.prange(0, nocca, blksize): ovVV = eris.get_ovVV(slice(p0, p1)) # ovVV = eris.ovVV[p0:p1] Fvvb += np.einsum('mf,mfAE->AE', t1a[p0:p1], ovVV) woVvO[p0:p1] = lib.einsum('JF,meBF->mBeJ', t1b, ovVV) woVVo[p0:p1] = lib.einsum('jf,mfBE->mBEj', -t1a, ovVV) u1b += lib.einsum('mIeF,meAF->IA', t2ab[p0:p1], ovVV) u2ab[p0:p1] += lib.einsum('IE,maEB->mIaB', t1b, ovVV.conj()) tmp1ab = lib.einsum('iJeF,meBF->iJmB', tauab, ovVV) u2ab -= lib.einsum('iJmB,ma->iJaB', tmp1ab, t1a[p0:p1]) ovVV = tmp1ab = None if nvira > 0 and noccb > 0: blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / (nvirb * nvira**2 * 3 + 1))) for p0, p1 in lib.prange(0, noccb, blksize): OVvv = eris.get_OVvv(slice(p0, p1)) # OVvv = eris.OVvv[p0:p1] Fvva += np.einsum('MF,MFae->ae', t1b[p0:p1], OVvv) wOvVo[p0:p1] = lib.einsum('jf,MEbf->MbEj', t1a, OVvv) wOvvO[p0:p1] = lib.einsum('JF,MFbe->MbeJ', -t1b, OVvv) u1a += lib.einsum('iMfE,MEaf->ia', t2ab[:, p0:p1], OVvv) u2ab[:, p0:p1] += lib.einsum('ie,MBea->iMaB', t1a, OVvv.conj()) tmp1abba = lib.einsum('iJeF,MFbe->iJbM', tauab, OVvv) u2ab -= lib.einsum('iJbM,MA->iJbA', tmp1abba, t1b[p0:p1]) OVvv = tmp1abba = None eris_ovov = np.asarray(eris.ovov) eris_ovoo = np.asarray(eris.ovoo) Woooo = lib.einsum('je,nemi->mnij', t1a, eris_ovoo) Woooo = Woooo - Woooo.transpose(0, 1, 3, 2) Woooo += np.asarray(eris.oooo).transpose(0, 2, 1, 3) Woooo += lib.einsum('ijef,menf->mnij', tauaa, eris_ovov) * .5 u2aa += lib.einsum('mnab,mnij->ijab', tauaa, Woooo * .5) Woooo = tauaa = None ovoo = eris_ovoo - eris_ovoo.transpose(2, 1, 0, 3) Fooa += np.einsum('ne,nemi->mi', t1a, ovoo) u1a += 0.5 * lib.einsum('mnae,meni->ia', t2aa, ovoo) wovvo += lib.einsum('nb,nemj->mbej', t1a, ovoo) ovoo = eris_ovoo = None tilaa = make_tau_aa(t2[0], t1a, t1a, fac=0.5) ovov = eris_ovov - eris_ovov.transpose(0, 3, 2, 1) Fvva -= .5 * lib.einsum('mnaf,menf->ae', tilaa, ovov) Fooa += .5 * lib.einsum('inef,menf->mi', tilaa, ovov) Fova = np.einsum('nf,menf->me', t1a, ovov) u2aa += ovov.conj().transpose(0, 2, 1, 3) * .5 wovvo -= 0.5 * lib.einsum('jnfb,menf->mbej', t2aa, ovov) woVvO += 0.5 * lib.einsum('nJfB,menf->mBeJ', t2ab, ovov) tmpaa = lib.einsum('jf,menf->mnej', t1a, ovov) wovvo -= lib.einsum('nb,mnej->mbej', t1a, tmpaa) eirs_ovov = ovov = tmpaa = tilaa = None eris_OVOV = np.asarray(eris.OVOV) eris_OVOO = np.asarray(eris.OVOO) WOOOO = lib.einsum('je,nemi->mnij', t1b, eris_OVOO) WOOOO = WOOOO - WOOOO.transpose(0, 1, 3, 2) WOOOO += np.asarray(eris.OOOO).transpose(0, 2, 1, 3) WOOOO += lib.einsum('ijef,menf->mnij', taubb, eris_OVOV) * .5 u2bb += lib.einsum('mnab,mnij->ijab', taubb, WOOOO * .5) WOOOO = taubb = None OVOO = eris_OVOO - eris_OVOO.transpose(2, 1, 0, 3) Foob += np.einsum('ne,nemi->mi', t1b, OVOO) u1b += 0.5 * lib.einsum('mnae,meni->ia', t2bb, OVOO) wOVVO += lib.einsum('nb,nemj->mbej', t1b, OVOO) OVOO = eris_OVOO = None tilbb = make_tau_aa(t2[2], t1b, t1b, fac=0.5) OVOV = eris_OVOV - eris_OVOV.transpose(0, 3, 2, 1) Fvvb -= .5 * lib.einsum('MNAF,MENF->AE', tilbb, OVOV) Foob += .5 * lib.einsum('inef,menf->mi', tilbb, OVOV) Fovb = np.einsum('nf,menf->me', t1b, OVOV) u2bb += OVOV.conj().transpose(0, 2, 1, 3) * .5 wOVVO -= 0.5 * lib.einsum('jnfb,menf->mbej', t2bb, OVOV) wOvVo += 0.5 * lib.einsum('jNbF,MENF->MbEj', t2ab, OVOV) tmpbb = lib.einsum('jf,menf->mnej', t1b, OVOV) wOVVO -= lib.einsum('nb,mnej->mbej', t1b, tmpbb) eris_OVOV = OVOV = tmpbb = tilbb = None eris_OVoo = np.asarray(eris.OVoo) eris_ovOO = np.asarray(eris.ovOO) Fooa += np.einsum('NE,NEmi->mi', t1b, eris_OVoo) u1a -= lib.einsum('nMaE,MEni->ia', t2ab, eris_OVoo) wOvVo -= lib.einsum('nb,MEnj->MbEj', t1a, eris_OVoo) woVVo += lib.einsum('NB,NEmj->mBEj', t1b, eris_OVoo) Foob += np.einsum('ne,neMI->MI', t1a, eris_ovOO) u1b -= lib.einsum('mNeA,meNI->IA', t2ab, eris_ovOO) woVvO -= lib.einsum('NB,meNJ->mBeJ', t1b, eris_ovOO) wOvvO += lib.einsum('nb,neMJ->MbeJ', t1a, eris_ovOO) WoOoO = lib.einsum('JE,NEmi->mNiJ', t1b, eris_OVoo) WoOoO += lib.einsum('je,neMI->nMjI', t1a, eris_ovOO) WoOoO += np.asarray(eris.ooOO).transpose(0, 2, 1, 3) eris_OVoo = eris_ovOO = None eris_ovOV = np.asarray(eris.ovOV) WoOoO += lib.einsum('iJeF,meNF->mNiJ', tauab, eris_ovOV) u2ab += lib.einsum('mNaB,mNiJ->iJaB', tauab, WoOoO) WoOoO = None tilab = make_tau_ab(t2[1], t1, t1, fac=0.5) Fvva -= lib.einsum('mNaF,meNF->ae', tilab, eris_ovOV) Fvvb -= lib.einsum('nMfA,nfME->AE', tilab, eris_ovOV) Fooa += lib.einsum('iNeF,meNF->mi', tilab, eris_ovOV) Foob += lib.einsum('nIfE,nfME->MI', tilab, eris_ovOV) Fova += np.einsum('NF,meNF->me', t1b, eris_ovOV) Fovb += np.einsum('nf,nfME->ME', t1a, eris_ovOV) u2ab += eris_ovOV.conj().transpose(0, 2, 1, 3) wovvo += 0.5 * lib.einsum('jNbF,meNF->mbej', t2ab, eris_ovOV) wOVVO += 0.5 * lib.einsum('nJfB,nfME->MBEJ', t2ab, eris_ovOV) wOvVo -= 0.5 * lib.einsum('jnfb,nfME->MbEj', t2aa, eris_ovOV) woVvO -= 0.5 * lib.einsum('JNFB,meNF->mBeJ', t2bb, eris_ovOV) woVVo += 0.5 * lib.einsum('jNfB,mfNE->mBEj', t2ab, eris_ovOV) wOvvO += 0.5 * lib.einsum('nJbF,neMF->MbeJ', t2ab, eris_ovOV) tmpabab = lib.einsum('JF,meNF->mNeJ', t1b, eris_ovOV) tmpbaba = lib.einsum('jf,nfME->MnEj', t1a, eris_ovOV) woVvO -= lib.einsum('NB,mNeJ->mBeJ', t1b, tmpabab) wOvVo -= lib.einsum('nb,MnEj->MbEj', t1a, tmpbaba) woVVo += lib.einsum('NB,NmEj->mBEj', t1b, tmpbaba) wOvvO += lib.einsum('nb,nMeJ->MbeJ', t1a, tmpabab) tmpabab = tmpbaba = tilab = None Fova += fova Fovb += fovb u1a += fova.conj() u1a += np.einsum('ie,ae->ia', t1a, Fvva) u1a -= np.einsum('ma,mi->ia', t1a, Fooa) u1a -= np.einsum('imea,me->ia', t2aa, Fova) u1a += np.einsum('iMaE,ME->ia', t2ab, Fovb) u1b += fovb.conj() u1b += np.einsum('ie,ae->ia', t1b, Fvvb) u1b -= np.einsum('ma,mi->ia', t1b, Foob) u1b -= np.einsum('imea,me->ia', t2bb, Fovb) u1b += np.einsum('mIeA,me->IA', t2ab, Fova) eris_oovv = np.asarray(eris.oovv) eris_ovvo = np.asarray(eris.ovvo) wovvo -= eris_oovv.transpose(0, 2, 3, 1) wovvo += eris_ovvo.transpose(0, 2, 1, 3) oovv = eris_oovv - eris_ovvo.transpose(0, 3, 2, 1) u1a -= np.einsum('nf,niaf->ia', t1a, oovv) tmp1aa = lib.einsum('ie,mjbe->mbij', t1a, oovv) u2aa += 2 * lib.einsum('ma,mbij->ijab', t1a, tmp1aa) eris_ovvo = eris_oovv = oovv = tmp1aa = None eris_OOVV = np.asarray(eris.OOVV) eris_OVVO = np.asarray(eris.OVVO) wOVVO -= eris_OOVV.transpose(0, 2, 3, 1) wOVVO += eris_OVVO.transpose(0, 2, 1, 3) OOVV = eris_OOVV - eris_OVVO.transpose(0, 3, 2, 1) u1b -= np.einsum('nf,niaf->ia', t1b, OOVV) tmp1bb = lib.einsum('ie,mjbe->mbij', t1b, OOVV) u2bb += 2 * lib.einsum('ma,mbij->ijab', t1b, tmp1bb) eris_OVVO = eris_OOVV = OOVV = None eris_ooVV = np.asarray(eris.ooVV) eris_ovVO = np.asarray(eris.ovVO) woVVo -= eris_ooVV.transpose(0, 2, 3, 1) woVvO += eris_ovVO.transpose(0, 2, 1, 3) u1b += np.einsum('nf,nfAI->IA', t1a, eris_ovVO) tmp1ab = lib.einsum('ie,meBJ->mBiJ', t1a, eris_ovVO) tmp1ab += lib.einsum('IE,mjBE->mBjI', t1b, eris_ooVV) u2ab -= lib.einsum('ma,mBiJ->iJaB', t1a, tmp1ab) eris_ooVV = eris_ovVo = tmp1ab = None eris_OOvv = np.asarray(eris.OOvv) eris_OVvo = np.asarray(eris.OVvo) wOvvO -= eris_OOvv.transpose(0, 2, 3, 1) wOvVo += eris_OVvo.transpose(0, 2, 1, 3) u1a += np.einsum('NF,NFai->ia', t1b, eris_OVvo) tmp1ba = lib.einsum('IE,MEbj->MbIj', t1b, eris_OVvo) tmp1ba += lib.einsum('ie,MJbe->MbJi', t1a, eris_OOvv) u2ab -= lib.einsum('MA,MbIj->jIbA', t1b, tmp1ba) eris_OOvv = eris_OVvO = tmp1ba = None u2aa += 2 * lib.einsum('imae,mbej->ijab', t2aa, wovvo) u2aa += 2 * lib.einsum('iMaE,MbEj->ijab', t2ab, wOvVo) u2bb += 2 * lib.einsum('imae,mbej->ijab', t2bb, wOVVO) u2bb += 2 * lib.einsum('mIeA,mBeJ->IJAB', t2ab, woVvO) u2ab += lib.einsum('imae,mBeJ->iJaB', t2aa, woVvO) u2ab += lib.einsum('iMaE,MBEJ->iJaB', t2ab, wOVVO) u2ab += lib.einsum('iMeA,MbeJ->iJbA', t2ab, wOvvO) u2ab += lib.einsum('IMAE,MbEj->jIbA', t2bb, wOvVo) u2ab += lib.einsum('mIeA,mbej->jIbA', t2ab, wovvo) u2ab += lib.einsum('mIaE,mBEj->jIaB', t2ab, woVVo) wovvo = wOVVO = woVvO = wOvVo = woVVo = wOvvO = None Ftmpa = Fvva - .5 * lib.einsum('mb,me->be', t1a, Fova) Ftmpb = Fvvb - .5 * lib.einsum('mb,me->be', t1b, Fovb) u2aa += lib.einsum('ijae,be->ijab', t2aa, Ftmpa) u2bb += lib.einsum('ijae,be->ijab', t2bb, Ftmpb) u2ab += lib.einsum('iJaE,BE->iJaB', t2ab, Ftmpb) u2ab += lib.einsum('iJeA,be->iJbA', t2ab, Ftmpa) Ftmpa = Fooa + 0.5 * lib.einsum('je,me->mj', t1a, Fova) Ftmpb = Foob + 0.5 * lib.einsum('je,me->mj', t1b, Fovb) u2aa -= lib.einsum('imab,mj->ijab', t2aa, Ftmpa) u2bb -= lib.einsum('imab,mj->ijab', t2bb, Ftmpb) u2ab -= lib.einsum('iMaB,MJ->iJaB', t2ab, Ftmpb) u2ab -= lib.einsum('mIaB,mj->jIaB', t2ab, Ftmpa) eris_ovoo = np.asarray(eris.ovoo).conj() eris_OVOO = np.asarray(eris.OVOO).conj() eris_OVoo = np.asarray(eris.OVoo).conj() eris_ovOO = np.asarray(eris.ovOO).conj() ovoo = eris_ovoo - eris_ovoo.transpose(2, 1, 0, 3) OVOO = eris_OVOO - eris_OVOO.transpose(2, 1, 0, 3) u2aa -= lib.einsum('ma,jbim->ijab', t1a, ovoo) u2bb -= lib.einsum('ma,jbim->ijab', t1b, OVOO) u2ab -= lib.einsum('ma,JBim->iJaB', t1a, eris_OVoo) u2ab -= lib.einsum('MA,ibJM->iJbA', t1b, eris_ovOO) eris_ovoo = eris_OVoo = eris_OVOO = eris_ovOO = None u2aa *= .5 u2bb *= .5 u2aa = u2aa - u2aa.transpose(0, 1, 3, 2) u2aa = u2aa - u2aa.transpose(1, 0, 2, 3) u2bb = u2bb - u2bb.transpose(0, 1, 3, 2) u2bb = u2bb - u2bb.transpose(1, 0, 2, 3) eia_a = lib.direct_sum('i-a->ia', mo_ea_o, mo_ea_v) eia_b = lib.direct_sum('i-a->ia', mo_eb_o, mo_eb_v) u1a /= eia_a u1b /= eia_b u2aa /= lib.direct_sum('ia+jb->ijab', eia_a, eia_a) u2ab /= lib.direct_sum('ia+jb->ijab', eia_a, eia_b) u2bb /= lib.direct_sum('ia+jb->ijab', eia_b, eia_b) time0 = log.timer_debug1('update t1 t2', *time0) t1new = u1a, u1b t2new = u2aa, u2ab, u2bb return t1new, t2new
def contract(myci, civec, eris): time0 = time.clock(), time.time() log = logger.Logger(myci.stdout, myci.verbose) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc nov = nocc * nvir noo = nocc**2 c0, c1, c2 = myci.cisdvec_to_amplitudes(civec, nmo, nocc) t2 = myci._add_vvvv(c2, eris, t2sym='jiba') t2 *= .5 # due to t2+t2.transpose(1,0,3,2) in the end time1 = log.timer_debug1('vvvv', *time0) foo = eris.fock[:nocc, :nocc].copy() fov = eris.fock[:nocc, nocc:].copy() fvv = eris.fock[nocc:, nocc:].copy() t1 = fov * c0 t1 += numpy.einsum('ib,ab->ia', c1, fvv) t1 -= numpy.einsum('ja,ji->ia', c1, foo) t2 += lib.einsum('kilj,klab->ijab', _cp(eris.oooo) * .5, c2) t2 += lib.einsum('ijac,bc->ijab', c2, fvv) t2 -= lib.einsum('kj,kiba->jiba', foo, c2) t2 += numpy.einsum('ia,jb->ijab', c1, fov) unit = nocc * nvir**2 + nocc**2 * nvir * 3 + 1 max_memory = max(0, myci.max_memory - lib.current_memory()[0]) blksize = min(nvir, max(BLKMIN, int(max_memory * .9e6 / 8 / unit))) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d', max_memory, nocc, nvir, blksize) nvir_pair = nvir * (nvir + 1) // 2 for p0, p1 in lib.prange(0, nvir, blksize): eris_oVoV = _cp(_cp(eris.oovv[:, :, p0:p1]).transpose(0, 2, 1, 3)) tmp = lib.einsum('kbjc,ikca->jiba', eris_oVoV, c2) t2[:, :, p0:p1] -= tmp * .5 t2[:, :, p0:p1] -= tmp.transpose(1, 0, 2, 3) tmp = None eris_ovvo = _cp(eris.ovvo[:, p0:p1]) t2[:, :, p0:p1] += eris_ovvo.transpose(0, 3, 1, 2) * (c0 * .5) t1 += numpy.einsum('ia,iabj->jb', c1[:, p0:p1], eris_ovvo) * 2 t1[:, p0:p1] -= numpy.einsum('ib,iajb->ja', c1, eris_oVoV) ovov = -.5 * eris_oVoV ovov += eris_ovvo.transpose(3, 1, 0, 2) eris_oVoV = eris_oovv = None theta = c2[:, :, p0:p1].transpose(2, 0, 1, 3) * 2 theta -= c2[:, :, p0:p1].transpose(2, 1, 0, 3) for j in range(nocc): t2[:, j] += lib.einsum('ckb,ckia->iab', ovov[j], theta) tmp = ovov = None t1 += numpy.einsum('aijb,ia->jb', theta, fov[:, p0:p1]) eris_ovoo = _cp(eris.ovoo[:, p0:p1]) t1 -= lib.einsum('bjka,jbki->ia', theta, eris_ovoo) t2[:, :, p0:p1] -= lib.einsum('jbik,ka->jiba', eris_ovoo.conj(), c1) eris_vooo = None eris_ovvv = eris.get_ovvv(slice(None), slice(p0, p1)).conj() t1 += lib.einsum('cjib,jcba->ia', theta, eris_ovvv) t2[:, :, p0:p1] += lib.einsum('iacb,jc->ijab', eris_ovvv, c1) tmp = eris_ovvv = None #:t2 + t2.transpose(1,0,3,2) for i in range(nocc): if i > 0: t2[i, :i] += t2[:i, i].transpose(0, 2, 1) t2[:i, i] = t2[i, :i].transpose(0, 2, 1) t2[i, i] = t2[i, i] + t2[i, i].T t0 = numpy.einsum('ia,ia->', fov, c1) * 2 t0 += numpy.einsum('iabj,ijab->', eris.ovvo, c2) * 2 t0 -= numpy.einsum('iabj,jiab->', eris.ovvo, c2) cinew = numpy.hstack((t0, t1.ravel(), t2.ravel())) return cinew
def _gamma2_outcore(myci, civec, nmo, nocc, h5fobj, compress_vvvv=False): log = logger.Logger(myci.stdout, myci.verbose) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc nvir_pair = nvir * (nvir + 1) // 2 c0, c1, c2 = myci.cisdvec_to_amplitudes(civec, nmo, nocc) h5fobj['dovov'] = (2 * c0 * c2.conj().transpose(0, 2, 1, 3) - c0 * c2.conj().transpose(1, 2, 0, 3)) doooo = lib.einsum('ijab,klab->ijkl', c2.conj(), c2) h5fobj['doooo'] = doooo.transpose(0, 2, 1, 3) - doooo.transpose(1, 2, 0, 3) * .5 doooo = None dooov = -lib.einsum('ia,klac->klic', c1 * 2, c2.conj()) h5fobj['dooov'] = dooov.transpose(0, 2, 1, 3) * 2 - dooov.transpose( 1, 2, 0, 3) dooov = None #:dvovv = numpy.einsum('ia,ikcd->akcd', c1, c2) * 2 #:dvvvv = lib.einsum('ijab,ijcd->abcd', c2, c2) max_memory = max(0, myci.max_memory - lib.current_memory()[0]) unit = max(nocc**2 * nvir * 2 + nocc * nvir**2 * 3 + 1, nvir**3 * 2 + nocc * nvir**2 + 1) blksize = min(nvir, max(BLKMIN, int(max_memory * .9e6 / 8 / unit))) iobuflen = int(256e6 / 8 / blksize) log.debug1('rdm intermediates: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir + blksize - 1) / blksize)) dtype = numpy.result_type(civec).char dovvv = h5fobj.create_dataset('dovvv', (nocc, nvir, nvir, nvir), dtype, chunks=(nocc, min(nocc, nvir), 1, nvir)) if compress_vvvv: dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair, nvir_pair), dtype) else: dvvvv = h5fobj.create_dataset('dvvvv', (nvir, nvir, nvir, nvir), dtype) for istep, (p0, p1) in enumerate(lib.prange(0, nvir, blksize)): theta = c2[:, :, p0:p1] - c2[:, :, p0:p1].transpose(1, 0, 2, 3) * .5 gvvvv = lib.einsum('ijab,ijcd->abcd', theta.conj(), c2) if compress_vvvv: # symmetrize dvvvv because it does not affect the results of cisd_grad # dvvvv = (dvvvv+dvvvv.transpose(0,1,3,2)) * .5 # dvvvv = (dvvvv+dvvvv.transpose(1,0,2,3)) * .5 # now dvvvv == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir, nvir, nvir)) tmpvvvv = numpy.empty((p1 - p0, nvir, nvir_pair)) for i in range(p1 - p0): tmp[:] = gvvvv[i].conj().transpose(1, 0, 2) lib.pack_tril(tmp + tmp.transpose(0, 2, 1), out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i - p0, j] += tmpvvvv[j - p0, i] tmpvvvv[i - p0, i] *= 2 for i in range(p1, nvir): off = i * (i + 1) // 2 dvvvv[off + p0:off + p1] = tmpvvvv[:, i] for i in range(p0, p1): off = i * (i + 1) // 2 if p0 > 0: tmpvvvv[i - p0, :p0] += dvvvv[off:off + p0] dvvvv[off:off + i + 1] = tmpvvvv[i - p0, :i + 1] * .25 tmp = tmpvvvv = None else: for i in range(p0, p1): dvvvv[i] = gvvvv[i - p0].conj().transpose(1, 0, 2) gvovv = numpy.einsum('ia,ikcd->akcd', c1[:, p0:p1].conj() * 2, c2) gvovv = gvovv.conj() dovvv[:, :, p0:p1] = gvovv.transpose(1, 3, 0, 2) * 2 - gvovv.transpose( 1, 2, 0, 3) theta = c2 * 2 - c2.transpose(1, 0, 2, 3) doovv = numpy.einsum('ia,kc->ikca', c1.conj(), -c1) doovv -= lib.einsum('kjcb,kica->jiab', c2.conj(), theta) doovv -= lib.einsum('ikcb,jkca->ijab', c2.conj(), theta) h5fobj['doovv'] = doovv doovv = None dovvo = lib.einsum('ikac,jkbc->iabj', theta.conj(), theta) dovvo += numpy.einsum('ia,kc->iack', c1.conj(), c1) * 2 h5fobj['dovvo'] = dovvo theta = dovvo = None dvvov = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], dvvov, h5fobj['dovvv'], h5fobj['dooov'])
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None): log = logger.Logger(mycc.stdout, mycc.verbose) time1 = time.clock(), time.time() if fsave is None: incore = True fsave = lib.H5TmpFile() else: incore = False dovov, dovOV, dOVov, dOVOV = d2[0] dvvvv, dvvVV, dVVvv, dVVVV = d2[1] doooo, dooOO, dOOoo, dOOOO = d2[2] doovv, dooVV, dOOvv, dOOVV = d2[3] dovvo, dovVO, dOVvo, dOVVO = d2[4] dvvov, dvvOV, dVVov, dVVOV = d2[5] dovvv, dovVV, dOVvv, dOVVV = d2[6] dooov, dooOV, dOOov, dOOOV = d2[7] mo_a = numpy.asarray(mo_coeff[0], order='F') mo_b = numpy.asarray(mo_coeff[1], order='F') nocca, nvira, noccb, nvirb = dovOV.shape nao, nmoa = mo_a.shape nmob = mo_b.shape[1] nao_pair = nao * (nao + 1) // 2 nvira_pair = nvira * (nvira + 1) // 2 nvirb_pair = nvirb * (nvirb + 1) // 2 fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1 fmm = _ccsd.libcc.CCmmm_transpose_sum pao_loc = ctypes.POINTER(ctypes.c_void_p)() def _trans(vin, mo_coeff, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow, nao_pair)) fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out fswap = lib.H5TmpFile() max_memory = mycc.max_memory - lib.current_memory()[0] blksize_a = int(max_memory * .9e6 / 8 / (nao_pair + nmoa**2)) blksize_a = min(nvira_pair, max(ccsd.BLKMIN, blksize_a)) chunks_a = (int(min(nao_pair, 4e8 / blksize_a)), blksize_a) v_aa = fswap.create_dataset('v_aa', (nao_pair, nvira_pair), 'f8', chunks=chunks_a) for p0, p1 in lib.prange(0, nvira_pair, blksize_a): v_aa[:, p0:p1] = _trans(lib.unpack_tril(dvvvv[p0:p1] * .25), mo_a, (nocca, nmoa, nocca, nmoa)).T v_ba = fswap.create_dataset('v_ab', (nao_pair, nvira_pair), 'f8', chunks=chunks_a) dvvOP = fswap.create_dataset('dvvOP', (nvira_pair, noccb, nmob), 'f8', chunks=(int(min(blksize_a, 4e8 / nmob)), 1, nmob)) for i in range(noccb): buf1 = numpy.empty((nmob, nvira, nvira)) buf1[:noccb] = dOOvv[i] * .5 buf1[noccb:] = dOVvv[i] buf1 = buf1.transpose(1, 2, 0) + buf1.transpose(2, 1, 0) dvvOP[:, i] = buf1[numpy.tril_indices(nvira)] for p0, p1 in lib.prange(0, nvira_pair, blksize_a): buf1 = numpy.zeros((p1 - p0, nmob, nmob)) buf1[:, noccb:, noccb:] = lib.unpack_tril(dvvVV[p0:p1] * .5) buf1[:, :noccb, :] = dvvOP[p0:p1] * .5 v_ba[:, p0:p1] = _trans(buf1, mo_b, (0, nmob, 0, nmob)).T dvvOO = dvvOV = None blksize_b = int(max_memory * .9e6 / 8 / (nao_pair + nmob**2)) blksize_b = min(nvirb_pair, max(ccsd.BLKMIN, blksize_b)) chunks_b = (int(min(nao_pair, 4e8 / blksize_b)), blksize_b) v_bb = fswap.create_dataset('v_bb', (nao_pair, nvirb_pair), 'f8', chunks=chunks_b) for p0, p1 in lib.prange(0, nvirb_pair, blksize_b): v_bb[:, p0:p1] = _trans(lib.unpack_tril(dVVVV[p0:p1] * .25), mo_b, (noccb, nmob, noccb, nmob)).T time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) blksize = int(max_memory * .9e6 / 8 / (nao_pair + nmoa**2)) blksize = min(nao_pair, max(ccsd.BLKMIN, blksize)) o_aa = fswap.create_dataset('o_aa', (nmoa, nocca, nao_pair), 'f8', chunks=(nocca, nocca, blksize)) o_ab = fswap.create_dataset('o_ab', (nmoa, nocca, nao_pair), 'f8', chunks=(nocca, nocca, blksize)) o_bb = fswap.create_dataset('o_bb', (nmob, noccb, nao_pair), 'f8', chunks=(noccb, noccb, blksize)) buf1 = numpy.zeros((nocca, nocca, nmoa, nmoa)) buf1[:, :, :nocca, :nocca] = _cp(doooo) * .25 buf1[:, :, nocca:, nocca:] = _cp(doovv) * .5 buf1 = _trans(buf1.reshape(nocca**2, -1), mo_a, (0, nmoa, 0, nmoa)) o_aa[:nocca] = buf1.reshape(nocca, nocca, nao_pair) buf1 = numpy.zeros((nocca, nocca, nmob, nmob)) buf1[:, :, :noccb, :noccb] = _cp(dooOO) * .5 buf1[:, :, :noccb, noccb:] = _cp(dooOV) buf1[:, :, noccb:, noccb:] = _cp(dooVV) * .5 buf1 = _trans(buf1.reshape(nocca**2, -1), mo_b, (0, nmob, 0, nmob)) o_ab[:nocca] = buf1.reshape(nocca, nocca, nao_pair) buf1 = numpy.zeros((noccb, noccb, nmob, nmob)) buf1[:, :, :noccb, :noccb] = _cp(dOOOO) * .25 buf1[:, :, noccb:, noccb:] = _cp(dOOVV) * .5 buf1 = _trans(buf1.reshape(noccb**2, -1), mo_b, (0, nmob, 0, nmob)) o_bb[:noccb] = buf1.reshape(noccb, noccb, nao_pair) dovoo = numpy.asarray(dooov).transpose(2, 3, 0, 1) dovOO = numpy.asarray(dOOov).transpose(2, 3, 0, 1) dOVOO = numpy.asarray(dOOOV).transpose(2, 3, 0, 1) for p0, p1 in lib.prange(nocca, nmoa, nocca): buf1 = numpy.zeros((nocca, p1 - p0, nmoa, nmoa)) buf1[:, :, :nocca, :nocca] = dovoo[:, p0 - nocca:p1 - nocca] buf1[:, :, nocca:, :nocca] = dovvo[:, p0 - nocca:p1 - nocca] * .5 buf1[:, :, :nocca, nocca:] = dovov[:, p0 - nocca:p1 - nocca] * .5 buf1[:, :, nocca:, nocca:] = dovvv[:, p0 - nocca:p1 - nocca] buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * nocca, -1) buf1 = _trans(buf1, mo_a, (0, nmoa, 0, nmoa)) o_aa[p0:p1] = buf1.reshape(p1 - p0, nocca, nao_pair) buf1 = numpy.zeros((nocca, p1 - p0, nmob, nmob)) buf1[:, :, :noccb, :noccb] = dovOO[:, p0 - nocca:p1 - nocca] buf1[:, :, noccb:, :noccb] = dovVO[:, p0 - nocca:p1 - nocca] buf1[:, :, :noccb, noccb:] = dovOV[:, p0 - nocca:p1 - nocca] buf1[:, :, noccb:, noccb:] = dovVV[:, p0 - nocca:p1 - nocca] buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * nocca, -1) buf1 = _trans(buf1, mo_b, (0, nmob, 0, nmob)) o_ab[p0:p1] = buf1.reshape(p1 - p0, nocca, nao_pair) for p0, p1 in lib.prange(noccb, nmob, noccb): buf1 = numpy.zeros((noccb, p1 - p0, nmob, nmob)) buf1[:, :, :noccb, :noccb] = dOVOO[:, p0 - noccb:p1 - noccb] buf1[:, :, noccb:, :noccb] = dOVVO[:, p0 - noccb:p1 - noccb] * .5 buf1[:, :, :noccb, noccb:] = dOVOV[:, p0 - noccb:p1 - noccb] * .5 buf1[:, :, noccb:, noccb:] = dOVVV[:, p0 - noccb:p1 - noccb] buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * noccb, -1) buf1 = _trans(buf1, mo_b, (0, nmob, 0, nmob)) o_bb[p0:p1] = buf1.reshape(p1 - p0, noccb, nao_pair) time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) dovoo = buf1 = None # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) dm2a = fsave.create_dataset('dm2aa+ab', (nao_pair, nao_pair), 'f8', chunks=(int(min(nao_pair, 4e8 / blksize)), blksize)) dm2b = fsave.create_dataset('dm2bb+ab', (nao_pair, nao_pair), 'f8', chunks=(int(min(nao_pair, 4e8 / blksize)), blksize)) for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1 - p0, nmoa, nmoa)) buf1[:, nocca:, nocca:] = lib.unpack_tril(_cp(v_aa[p0:p1])) buf1[:, :, :nocca] = o_aa[:, :, p0:p1].transpose(2, 0, 1) buf2 = _trans(buf1, mo_a, (0, nmoa, 0, nmoa)) if p0 > 0: buf1 = _cp(dm2a[:p0, p0:p1]) buf1[:p0, :p1 - p0] += buf2[:p1 - p0, :p0].T buf2[:p1 - p0, :p0] = buf1[:p0, :p1 - p0].T dm2a[:p0, p0:p1] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) dm2a[p0:p1] = buf2 buf1 = buf2 = None for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1 - p0, nmob, nmob)) buf1[:, noccb:, noccb:] = lib.unpack_tril(_cp(v_bb[p0:p1])) buf1[:, :, :noccb] = o_bb[:, :, p0:p1].transpose(2, 0, 1) buf2 = _trans(buf1, mo_b, (0, nmob, 0, nmob)) if p0 > 0: buf1 = _cp(dm2b[:p0, p0:p1]) buf1[:p0, :p1 - p0] += buf2[:p1 - p0, :p0].T buf2[:p1 - p0, :p0] = buf1[:p0, :p1 - p0].T dm2b[:p0, p0:p1] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) dm2b[p0:p1] = buf2 buf1 = buf2 = None for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1 - p0, nmoa, nmoa)) buf1[:, nocca:, nocca:] = lib.unpack_tril(_cp(v_ba[p0:p1])) buf1[:, :, :nocca] = o_ab[:, :, p0:p1].transpose(2, 0, 1) buf2 = _trans(buf1, mo_a, (0, nmoa, 0, nmoa)) dm2a[:, p0:p1] = dm2a[:, p0:p1] + buf2.T dm2b[p0:p1] = dm2b[p0:p1] + buf2 buf1 = buf2 = None time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) if incore: return (fsave['dm2aa+ab'].value, fsave['dm2bb+ab'].value) else: return fsave
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None): # dm2 = ccsd_rdm._make_rdm2(mycc, None, d2, with_dm1=False) # dm2 = numpy.einsum('pi,ijkl->pjkl', mo_coeff, dm2) # dm2 = numpy.einsum('pj,ijkl->ipkl', mo_coeff, dm2) # dm2 = numpy.einsum('pk,ijkl->ijpl', mo_coeff, dm2) # dm2 = numpy.einsum('pl,ijkl->ijkp', mo_coeff, dm2) # dm2 = dm2 + dm2.transpose(1,0,2,3) # dm2 = dm2 + dm2.transpose(0,1,3,2) # return ao2mo.restore(4, dm2*.5, nmo) log = logger.Logger(mycc.stdout, mycc.verbose) time1 = time.clock(), time.time() if fsave is None: incore = True fsave = lib.H5TmpFile() else: incore = False dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 nocc, nvir = dovov.shape[:2] mo_coeff = numpy.asarray(mo_coeff, order='F') nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 nvir_pair = nvir * (nvir + 1) // 2 fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1 fmm = _ccsd.libcc.CCmmm_transpose_sum pao_loc = ctypes.POINTER(ctypes.c_void_p)() def _trans(vin, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow, nao_pair)) fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out fswap = lib.H5TmpFile() max_memory = mycc.max_memory - lib.current_memory()[0] blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)) blksize = min(nvir_pair, max(ccsd.BLKMIN, blksize)) chunks_vv = (int(min(blksize, 4e8 / blksize)), blksize) fswap.create_dataset('v', (nao_pair, nvir_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nvir_pair, blksize): fswap['v'][:, p0:p1] = _trans(lib.unpack_tril(_cp(dvvvv[p0:p1])), (nocc, nmo, nocc, nmo)).T time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)) blksize = min(nao_pair, max(ccsd.BLKMIN, blksize)) fswap.create_dataset('o', (nmo, nocc, nao_pair), 'f8', chunks=(nocc, nocc, blksize)) buf1 = numpy.zeros((nocc, nocc, nmo, nmo)) buf1[:, :, :nocc, :nocc] = doooo buf1[:, :, nocc:, nocc:] = _cp(doovv) buf1 = _trans(buf1.reshape(nocc**2, -1), (0, nmo, 0, nmo)) fswap['o'][:nocc] = buf1.reshape(nocc, nocc, nao_pair) dovoo = numpy.asarray(dooov).transpose(2, 3, 0, 1) for p0, p1 in lib.prange(nocc, nmo, nocc): buf1 = numpy.zeros((nocc, p1 - p0, nmo, nmo)) buf1[:, :, :nocc, :nocc] = dovoo[:, p0 - nocc:p1 - nocc] buf1[:, :, nocc:, :nocc] = dovvo[:, p0 - nocc:p1 - nocc] buf1[:, :, :nocc, nocc:] = dovov[:, p0 - nocc:p1 - nocc] buf1[:, :, nocc:, nocc:] = dovvv[:, p0 - nocc:p1 - nocc] buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * nocc, -1) buf1 = _trans(buf1, (0, nmo, 0, nmo)) fswap['o'][p0:p1] = buf1.reshape(p1 - p0, nocc, nao_pair) time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) dovoo = buf1 = None # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) gsave = fsave.create_dataset('dm2', (nao_pair, nao_pair), 'f8', chunks=chunks_vv) for p0, p1 in lib.prange(0, nao_pair, blksize): buf1 = numpy.zeros((p1 - p0, nmo, nmo)) buf1[:, nocc:, nocc:] = lib.unpack_tril(_cp(fswap['v'][p0:p1])) buf1[:, :, :nocc] = fswap['o'][:, :, p0:p1].transpose(2, 0, 1) buf2 = _trans(buf1, (0, nmo, 0, nmo)) if p0 > 0: buf1 = _cp(gsave[:p0, p0:p1]) buf1[:p0, :p1 - p0] += buf2[:p1 - p0, :p0].T buf2[:p1 - p0, :p0] = buf1[:p0, :p1 - p0].T gsave[:p0, p0:p1] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) gsave[p0:p1] = buf2 time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) if incore: return fsave['dm2'].value else: return fsave
def casci(self, mo_coeff, ci0=None, eris=None, verbose=None, envs=None): if eris is None: fcasci = copy.copy(self) fcasci.ao2mo = self.get_h2cas else: fcasci = _fake_h_for_fast_casci(self, mo_coeff, eris) if isinstance(verbose, logger.Logger): log = verbose else: if verbose is None: verbose = self.verbose log = logger.Logger(self.stdout, verbose) e_tot, e_ci, fcivec = casci.kernel(fcasci, mo_coeff, ci0, log) if numpy.size(e_ci) != 1: raise RuntimeError( 'Multiple roots are detected in fcisolver. ' 'CASSCF does not know which state to optimize.\n' 'See also mcscf.state_average or mcscf.state_specific for excited states.' ) elif numpy.ndim(e_ci) != 0: # This is a workaround for external CI solver compatibility. e_ci = e_ci[0] if envs is not None and log.verbose >= logger.INFO: log.debug('CAS space CI energy = %.15g', e_ci) if hasattr(self.fcisolver, 'spin_square'): ss = self.fcisolver.spin_square(fcivec, self.ncas, self.nelecas) else: ss = None if 'imicro' in envs: # Within CASSCF iteration if ss is None: log.info( 'macro iter %d (%d JK %d micro), ' 'CASSCF E = %.15g dE = %.8g', envs['imacro'], envs['njk'], envs['imicro'], e_tot, e_tot - envs['elast']) else: log.info( 'macro iter %d (%d JK %d micro), ' 'CASSCF E = %.15g dE = %.8g S^2 = %.7f', envs['imacro'], envs['njk'], envs['imicro'], e_tot, e_tot - envs['elast'], ss[0]) if 'norm_gci' in envs: log.info( ' |grad[o]|=%5.3g ' '|grad[c]|= %s |ddm|=%5.3g', envs['norm_gorb0'], envs['norm_gci'], envs['norm_ddm']) else: log.info(' |grad[o]|=%5.3g |ddm|=%5.3g', envs['norm_gorb0'], envs['norm_ddm']) else: # Initialization step if ss is None: log.info('CASCI E = %.15g', e_tot) else: log.info('CASCI E = %.15g S^2 = %.7f', e_tot, ss[0]) return e_tot, e_ci, fcivec
def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None, mf_grad=None, verbose=logger.INFO): if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 if l1 is None: l1 = mycc.l1 if l2 is None: l2 = mycc.l2 if eris is None: eris = ccsd._ERIS(mycc) if mf_grad is None: mf_grad = rhf_grad.Gradients(mycc._scf) log = logger.Logger(mycc.stdout, mycc.verbose) time0 = time.clock(), time.time() mol = mycc.mol moidx = numpy.ones(mycc.mo_coeff.shape[1], dtype=numpy.bool) if isinstance(mycc.frozen, (int, numpy.integer)): raise NotImplementedError('frozen orbital ccsd_grad') moidx[:mycc.frozen] = False else: moidx[mycc.frozen] = False mo_coeff = mycc.mo_coeff[:, moidx] #FIXME: ensure mycc.mo_coeff is canonical orbital mo_energy = eris.fock.diagonal() nocc, nvir = t1.shape nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 log.debug('Build ccsd rdm1 intermediates') d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 time1 = log.timer('rdm1 intermediates', *time0) log.debug('Build ccsd rdm2 intermediates') _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') d2 = ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) time1 = log.timer('rdm2 intermediates', *time1) log.debug('Build ccsd response_rdm1') Ioo, Ivv, Ivo, Xvo = IX_intermediates(mycc, t1, t2, l1, l2, eris, d1, d2) time1 = log.timer('response_rdm1 intermediates', *time1) dm1mo = response_dm1(mycc, t1, t2, l1, l2, eris, (Ioo, Ivv, Ivo, Xvo)) dm1mo[:nocc, :nocc] = doo + doo.T dm1mo[nocc:, nocc:] = dvv + dvv.T dm1ao = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) im1 = numpy.zeros_like(dm1mo) im1[:nocc, :nocc] = Ioo im1[nocc:, nocc:] = Ivv im1[nocc:, :nocc] = Ivo im1[:nocc, nocc:] = Ivo.T im1 = reduce(numpy.dot, (mo_coeff, im1, mo_coeff.T)) time1 = log.timer('response_rdm1', *time1) log.debug('symmetrized rdm2 and MO->AO transformation') _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) # Basically, 4 times of dm2 is computed. *2 in _rdm2_mo2ao, *2 in _load_block_tril fdm2 = h5py.File(_dm2file.name, 'w') dm1_with_hf = dm1mo.copy() for i in range( nocc ): # HF 2pdm ~ 4(ij)(kl)-2(il)(jk), diagonal+1 because of 4*dm2 dm1_with_hf[i, i] += 1 _rdm2_mo2ao(mycc, d2, dm1_with_hf, mo_coeff, fdm2) time1 = log.timer('MO->AO transformation', *time1) for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() #TODO: pass hf_grad object to compute h1 and s1 log.debug('h1 and JK1') h1 = mf_grad.get_hcore(mol) s1 = mf_grad.get_ovlp(mol) zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5 zeta[nocc:, :nocc] = mo_energy[:nocc] zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1) zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T) vhf4sij = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1ao + dm1ao.T), p1)) time1 = log.timer('h1 and JK1', *time1) # Hartree-Fock part contribution hf_dm1 = mycc._scf.make_rdm1(mycc._scf.mo_coeff, mycc._scf.mo_occ) dm1ao += hf_dm1 zeta += mf_grad.make_rdm1e(mycc._scf.mo_energy, mycc._scf.mo_coeff, mycc._scf.mo_occ) if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max(1, int(max_memory * 1e6 / 8 / (nao**3 * 2.5))) ioblksize = fdm2['dm2/0'].shape[-1] de = numpy.zeros((len(atmlst), 3)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] = (numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) + numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1])) # h[1] \dot DM, *2 for +c.c., contribute to f1 h1ao = mf_grad._grad_rinv(mol, ia) h1ao[:, p0:p1] += h1[:, p0:p1] de[k] += (numpy.einsum('xij,ij->x', h1ao, dm1ao) + numpy.einsum('xji,ij->x', h1ao, dm1ao)) # -s[1]*e \dot DM, contribute to f1 de[k] -= (numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) + numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1])) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf4sij[p0:p1]) * 2 # 2e AO integrals dot 2pdm ip0 = p0 for b0, b1, nf in shell_prange(mol, shl0, shl1, blksize): eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=(b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas)) eri1 = eri1.reshape(3, nf, nao, -1) dm2buf = numpy.empty((nf, nao, nao_pair)) for ic, (i0, i1) in enumerate(prange(0, nao_pair, ioblksize)): _load_block_tril(fdm2['dm2/%d' % ic], ip0, ip0 + nf, dm2buf[:, :, i0:i1]) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2 eri1 = dm2buf = None ip0 += nf log.debug('grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer('grad of atom %d' % ia, *time1) log.note('CCSD gradinets') log.note('==============') log.note(' x y z') for k, ia in enumerate(atmlst): log.note('%d %s %15.9f %15.9f %15.9f', ia, mol.atom_symbol(ia), de[k, 0], de[k, 1], de[k, 2]) log.timer('CCSD gradients', *time0) for key in fdm2.keys(): del (fdm2[key]) fdm2.close() _d2tmpfile = _dm2file = None return de
def _add_vvvv(mycc, t1, t2, eris, out=None, with_ovvv=False, t2sym=None): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) if t1 is None: t2aa, t2ab, t2bb = t2 else: t2aa, t2ab, t2bb = make_tau(t2, t1, t1) nocca, nvira = t2aa.shape[1:3] noccb, nvirb = t2bb.shape[1:3] if mycc.direct: assert (t2sym is None) if with_ovvv: raise NotImplementedError if getattr(eris, 'mo_coeff', None) is not None: mo_a, mo_b = eris.mo_coeff else: moidxa, moidxb = mycc.get_frozen_mask() mo_a = mycc.mo_coeff[0][:, moidxa] mo_b = mycc.mo_coeff[1][:, moidxb] nao = mo_a.shape[0] otrila = np.tril_indices(nocca, -1) otrilb = np.tril_indices(noccb, -1) if nocca > 1: tauaa = lib.einsum('xab,pa->xpb', t2aa[otrila], mo_a[:, nocca:]) tauaa = lib.einsum('xab,pb->xap', tauaa, mo_a[:, nocca:]) else: tauaa = np.zeros((0, nao, nao)) if noccb > 1: taubb = lib.einsum('xab,pa->xpb', t2bb[otrilb], mo_b[:, noccb:]) taubb = lib.einsum('xab,pb->xap', taubb, mo_b[:, noccb:]) else: taubb = np.zeros((0, nao, nao)) tauab = lib.einsum('ijab,pa->ijpb', t2ab, mo_a[:, nocca:]) tauab = lib.einsum('ijab,pb->ijap', tauab, mo_b[:, noccb:]) tau = np.vstack((tauaa, taubb, tauab.reshape(nocca * noccb, nao, nao))) tauaa = taubb = tauab = None time0 = log.timer_debug1('vvvv-tau', *time0) buf = ccsd._contract_vvvv_t2(mycc, mycc.mol, None, tau, out, log) mo = np.asarray(np.hstack((mo_a[:, nocca:], mo_b[:, noccb:])), order='F') u2aa = np.zeros_like(t2aa) if nocca > 1: u2tril = buf[:otrila[0].size] u2tril = _ao2mo.nr_e2(u2tril.reshape(-1, nao**2), mo.conj(), (0, nvira, 0, nvira), 's1', 's1') u2tril = u2tril.reshape(otrila[0].size, nvira, nvira) u2aa[otrila[1], otrila[0]] = u2tril.transpose(0, 2, 1) u2aa[otrila] = u2tril u2bb = np.zeros_like(t2bb) if noccb > 1: u2tril = buf[otrila[0].size:otrila[0].size + otrilb[0].size] u2tril = _ao2mo.nr_e2(u2tril.reshape(-1, nao**2), mo.conj(), (nvira, nvira + nvirb, nvira, nvira + nvirb), 's1', 's1') u2tril = u2tril.reshape(otrilb[0].size, nvirb, nvirb) u2bb[otrilb[1], otrilb[0]] = u2tril.transpose(0, 2, 1) u2bb[otrilb] = u2tril u2ab = _ao2mo.nr_e2( buf[-nocca * noccb:].reshape(nocca * noccb, nao**2), mo, (0, nvira, nvira, nvira + nvirb), 's1', 's1') u2ab = u2ab.reshape(t2ab.shape) else: assert (not with_ovvv) if t2sym is None: tmp = eris._contract_vvvv_t2(mycc, t2aa[np.tril_indices(nocca)], mycc.direct, None) u2aa = ccsd._unpack_t2_tril(tmp, nocca, nvira, None, 'jiba') tmp = eris._contract_VVVV_t2(mycc, t2bb[np.tril_indices(noccb)], mycc.direct, None) u2bb = ccsd._unpack_t2_tril(tmp, noccb, nvirb, None, 'jiba') u2ab = eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, None) else: u2aa = eris._contract_vvvv_t2(mycc, t2aa, mycc.direct, None) u2bb = eris._contract_VVVV_t2(mycc, t2bb, mycc.direct, None) u2ab = eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, None) return u2aa, u2ab, u2bb
def _rdm2_mo2ao(mycc, d2, dm1, mo_coeff, fsave=None): log = logger.Logger(mycc.stdout, mycc.verbose) if fsave is None: _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fsave = h5py.File(_dm2file.name, 'w') else: _dm2file = None time1 = time.clock(), time.time() dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 nocc, nvir = dovov.shape[:2] nov = nocc * nvir nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 nvir_pair = nvir * (nvir + 1) // 2 mo_coeff = numpy.asarray(mo_coeff, order='F') def _trans(vin, orbs_slice, out=None): nrow = vin.shape[0] if out is None: out = numpy.empty((nrow, nao_pair)) fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv') pao_loc = ctypes.POINTER(ctypes.c_void_p)() fdrv(_ccsd.libcc.AO2MOtranse2_nr_s1, _ccsd.libcc.CCmmm_transpose_sum, out.ctypes.data_as(ctypes.c_void_p), vin.ctypes.data_as(ctypes.c_void_p), mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow), ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc, ctypes.c_int(0)) return out # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2)) _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max( 1, int(max_memory * 1e6 / 8 / (nmo * nao_pair + nmo**3 + nvir**3))) iobuflen = int(256e6 / 8 / (blksize * nmo)) log.debug1('_rdm2_mo2ao pass 1: blksize = %d, iobuflen = %d', blksize, iobuflen) fswap.create_group('o') # for h5py old version pool1 = numpy.empty((blksize, nmo, nmo, nmo)) pool2 = numpy.empty((blksize, nmo, nao_pair)) bufd_ovvv = numpy.empty((blksize, nvir, nvir, nvir)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): buf1 = pool1[:p1 - p0] buf1[:, :nocc, :nocc, :nocc] = doooo[p0:p1] buf1[:, :nocc, :nocc, nocc:] = dooov[p0:p1] buf1[:, :nocc, nocc:, :nocc] = 0 buf1[:, :nocc, nocc:, nocc:] = doovv[p0:p1] buf1[:, nocc:, :nocc, :nocc] = 0 buf1[:, nocc:, :nocc, nocc:] = dovov[p0:p1] buf1[:, nocc:, nocc:, :nocc] = dovvo[p0:p1] d_ovvv = bufd_ovvv[:p1 - p0] ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) buf1[:, nocc:, nocc:, nocc:] = d_ovvv for i in range(p0, p1): buf1[i - p0, i, :, :] += dm1 buf1[i - p0, :, :, i] -= dm1 * .5 buf2 = pool2[:p1 - p0].reshape(-1, nao_pair) _trans(buf1.reshape(-1, nmo**2), (0, nmo, 0, nmo), buf2) ao2mo.outcore._transpose_to_h5g(fswap, 'o/%d' % istep, buf2, iobuflen) pool1 = pool2 = bufd_ovvv = None time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1) fswap.create_group('v') # for h5py old version pool1 = numpy.empty((blksize * nvir, nao_pair)) pool2 = numpy.empty((blksize * nvir, nvir, nvir)) for istep, (p0, p1) in enumerate(prange(0, nvir_pair, blksize * nvir)): buf1 = _cp(dvvvv[p0:p1]) buf2 = lib.unpack_tril(buf1, out=pool2[:p1 - p0]) buf1 = _trans(buf2, (nocc, nmo, nocc, nmo), out=pool1[:p1 - p0]) ao2mo.outcore._transpose_to_h5g(fswap, 'v/%d' % istep, buf1, iobuflen) pool1 = pool2 = None time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1) # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1) max_memory = mycc.max_memory - lib.current_memory()[0] blksize = max(nao, int(max_memory * 1e6 / 8 / (nao_pair + nmo**2))) iobuflen = int(256e6 / 8 / blksize) log.debug1('_rdm2_mo2ao pass 3: blksize = %d, iobuflen = %d', blksize, iobuflen) gsave = fsave.create_group('dm2') for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)): gsave.create_dataset(str(istep), (nao_pair, p1 - p0), 'f8') diagidx = numpy.arange(nao) diagidx = diagidx * (diagidx + 1) // 2 + diagidx pool1 = numpy.empty((blksize, nmo, nmo)) pool2 = numpy.empty((blksize, nvir_pair)) pool3 = numpy.empty((blksize, nvir, nvir)) pool4 = numpy.empty((blksize, nao_pair)) for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)): buf1 = pool1[:p1 - p0] ao2mo.outcore._load_from_h5g(fswap['o'], p0, p1, buf1[:, :nocc].reshape(p1 - p0, -1)) buf2 = ao2mo.outcore._load_from_h5g(fswap['v'], p0, p1, pool2[:p1 - p0]) buf3 = lib.unpack_tril(buf2, out=pool3[:p1 - p0]) buf1[:, nocc:, nocc:] = buf3 buf1[:, nocc:, :nocc] = 0 buf2 = _trans(buf1, (0, nmo, 0, nmo), out=pool4[:p1 - p0]) ic = 0 idx = diagidx[diagidx < p1] if p0 > 0: buf1 = _cp(gsave[str(istep)][:p0]) for i0, i1 in prange(0, p1 - p0, BLKSIZE): for j0, j1, in prange(0, p0, BLKSIZE): buf1[j0:j1, i0:i1] += buf2[i0:i1, j0:j1].T buf2[i0:i1, j0:j1] = buf1[j0:j1, i0:i1].T buf1[:, idx[p0 <= idx] - p0] *= .5 gsave[str(istep)][:p0] = buf1 lib.transpose_sum(buf2[:, p0:p1], inplace=True) buf2[:, idx] *= .5 for ic, (i0, i1) in enumerate(prange(0, nao_pair, blksize)): gsave[str(ic)][p0:p1] = buf2[:, i0:i1] time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1) del (fswap['o']) del (fswap['v']) fswap.close() _tmpfile = None time1 = log.timer_debug1('_rdm2_mo2ao cleanup', *time1) if _dm2file is not None: nvir_pair = nvir * (nvir + 1) // 2 dm2 = numpy.empty((nvir_pair, nvir_pair)) ao2mo.outcore._load_from_h5g(fsave['dm2'], 0, nvir_pair, dm2) fsave.close() _dm2file = None return dm2 else: return fsave
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() # try: # j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) # except scipy.linalg.LinAlgError as e: # msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') # log.error(msg) # raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) # kLR = LkR.T # kLI = LkI.T # if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR) # if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI) # kLR *= coulG.reshape(-1,1) # kLI *= coulG.reshape(-1,1) # kLRs.append(kLR) # kLIs.append(kLI) # aoaux = LkR = LkI = kLR = kLI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() try: j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) except scipy.linalg.LinAlgError as e: msg = ('===================================\n' 'J-metric not positive definite.\n' 'It is likely that gs is not enough.\n' '===================================') log.error(msg) raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) kLR = LkR[naux:].T kLI = LkI[naux:].T if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:]) if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:]) kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = LkR = LkI = kLR = kLI = coulG = None feri = h5py.File(mydf._cderi) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v, lower=True, overwrite_b=True) feri['j3c/%d' % ji][:naux, col0:col1] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) for k, kptij in enumerate(kptij_lst): v = feri['j3c/%d' % k][:naux] del (feri['j3c/%d' % k]) feri['j3c/%d' % k] = v feri.close()
def kernel(casscf, mo_coeff, tol=1e-7, conv_tol_grad=None, ci0=None, callback=None, verbose=None, dump_chk=True): if verbose is None: verbose = casscf.verbose if callback is None: callback = casscf.callback log = logger.Logger(casscf.stdout, verbose) cput0 = (time.clock(), time.time()) log.debug('Start 2-step CASSCF') mo = mo_coeff nmo = mo.shape[1] eris = casscf.ao2mo(mo) e_tot, e_ci, fcivec = casscf.casci(mo, ci0, eris, log, locals()) if casscf.ncas == nmo and not casscf.internal_rotation: if casscf.canonicalization: log.debug('CASSCF canonicalization') mo, fcivec, mo_energy = casscf.canonicalize(mo, fcivec, eris, False, casscf.natorb, verbose=log) return True, e_tot, e_ci, fcivec, mo if conv_tol_grad is None: conv_tol_grad = numpy.sqrt(tol) logger.info(casscf, 'Set conv_tol_grad to %g', conv_tol_grad) conv_tol_ddm = conv_tol_grad * 3 conv = False de, elast = e_tot, e_tot totmicro = totinner = 0 casdm1 = 0 r0 = None t2m = t1m = log.timer('Initializing 2-step CASSCF', *cput0) imacro = 0 while not conv and imacro < casscf.max_cycle_macro: imacro += 1 njk = 0 t3m = t2m casdm1_old = casdm1 casdm1, casdm2 = casscf.fcisolver.make_rdm12(fcivec, casscf.ncas, casscf.nelecas) norm_ddm = numpy.linalg.norm(casdm1 - casdm1_old) t3m = log.timer('update CAS DM', *t3m) max_cycle_micro = 1 # casscf.micro_cycle_scheduler(locals()) max_stepsize = casscf.max_stepsize_scheduler(locals()) for imicro in range(max_cycle_micro): rota = casscf.rotate_orb_cc(mo, lambda:fcivec, lambda:casdm1, lambda:casdm2, eris, r0, conv_tol_grad*.3, max_stepsize, log) u, g_orb, njk1, r0 = next(rota) rota.close() njk += njk1 norm_t = numpy.linalg.norm(u-numpy.eye(nmo)) norm_gorb = numpy.linalg.norm(g_orb) if imicro == 0: norm_gorb0 = norm_gorb de = numpy.dot(casscf.pack_uniq_var(u), g_orb) t3m = log.timer('orbital rotation', *t3m) eris = None u = u.copy() g_orb = g_orb.copy() mo = casscf.rotate_mo(mo, u, log) eris = casscf.ao2mo(mo) t3m = log.timer('update eri', *t3m) log.debug('micro %d ~dE=%5.3g |u-1|=%5.3g |g[o]|=%5.3g |dm1|=%5.3g', imicro, de, norm_t, norm_gorb, norm_ddm) if callable(callback): callback(locals()) t2m = log.timer('micro iter %d'%imicro, *t2m) if norm_t < 1e-4 or abs(de) < tol*.4 or norm_gorb < conv_tol_grad*.2: break totinner += njk totmicro += imicro + 1 e_tot, e_ci, fcivec = casscf.casci(mo, fcivec, eris, log, locals()) log.timer('CASCI solver', *t3m) t2m = t1m = log.timer('macro iter %d'%imacro, *t1m) de, elast = e_tot - elast, e_tot if (abs(de) < tol and norm_gorb < conv_tol_grad and norm_ddm < conv_tol_ddm): conv = True else: elast = e_tot if dump_chk: casscf.dump_chk(locals()) if callable(callback): callback(locals()) if conv: log.info('2-step CASSCF converged in %d macro (%d JK %d micro) steps', imacro, totinner, totmicro) else: log.info('2-step CASSCF not converged, %d macro (%d JK %d micro) steps', imacro, totinner, totmicro) if casscf.canonicalization: log.info('CASSCF canonicalization') mo, fcivec, mo_energy = \ casscf.canonicalize(mo, fcivec, eris, False, casscf.natorb, casdm1, log) if casscf.natorb: # dump_chk may save casdm1 nocc = casscf.ncore + casscf.ncas occ, ucas = casscf._eig(-casdm1, casscf.ncore, nocc)[0] casdm1 = numpy.diag(-occ) if dump_chk: casscf.dump_chk(locals()) log.timer('2-step CASSCF', *cput0) return conv, e_tot, e_ci, fcivec, mo, mo_energy
def update_amps(cc, t1, t2, eris, max_memory=2000): time0 = time.clock(), time.time() log = logger.Logger(cc.stdout, cc.verbose) nkpts, nocc, nvir = t1.shape #nov = nocc*nvir fock = eris.fock #t1new = numpy.zeros_like(t1) #t2new = numpy.zeros_like(t2) fov = fock[:, :nocc, nocc:].copy() foo = fock[:, :nocc, :nocc].copy() fvv = fock[:, nocc:, nocc:].copy() #mo_e = eris.fock.diagonal() #eia = mo_e[:nocc,None] - mo_e[None,nocc:] #eijab = lib.direct_sum('ia,jb->ijab',eia,eia) tau = imdk.make_tau(cc, t2, t1, t1) ### From eom-cc hackathon code ### Fvv = imdk.cc_Fvv(cc, t1, t2, eris) Foo = imdk.cc_Foo(cc, t1, t2, eris) Fov = imdk.cc_Fov(cc, t1, t2, eris) Woooo = imdk.cc_Woooo(cc, t1, t2, eris) Wvvvv = imdk.cc_Wvvvv(cc, t1, t2, eris) Wovvo = imdk.cc_Wovvo(cc, t1, t2, eris) # Move energy terms to the other side Fvv -= fvv Foo -= foo # Get the momentum conservation array # Note: chemist's notation for momentum conserving t2(ki,kj,ka,kb), even though # integrals are in physics notation kconserv = tools.get_kconserv(cc._scf.cell, cc.kpts) eris_ovvo = numpy.zeros(shape=(nkpts, nkpts, nkpts, nocc, nvir, nvir, nocc), dtype=t2.dtype) eris_oovo = numpy.zeros(shape=(nkpts, nkpts, nkpts, nocc, nocc, nvir, nocc), dtype=t2.dtype) eris_vvvo = numpy.zeros(shape=(nkpts, nkpts, nkpts, nvir, nvir, nvir, nocc), dtype=t2.dtype) for km in range(nkpts): for kb in range(nkpts): for ke in range(nkpts): kj = kconserv[km, ke, kb] # <mb||je> -> -<mb||ej> eris_ovvo[km, kb, ke] = -eris.ovov[km, kb, kj].transpose(0, 1, 3, 2) # <mn||je> -> -<mn||ej> # let kb = kn as a dummy variable eris_oovo[km, kb, ke] = -eris.ooov[km, kb, kj].transpose(0, 1, 3, 2) # <ma||be> -> - <be||am>* # let kj = ka as a dummy variable kj = kconserv[km, ke, kb] eris_vvvo[ke, kj, kb] = -eris.ovvv[km, kb, ke].transpose( 2, 3, 1, 0).conj() # T1 equation t1new = numpy.zeros(shape=t1.shape, dtype=t1.dtype) for ka in range(nkpts): ki = ka # TODO: Does this fov need a conj()? Usually zero w/ canonical HF. t1new[ka] += fov[ka, :, :] t1new[ka] += einsum('ie,ae->ia', t1[ka], Fvv[ka]) t1new[ka] += -einsum('ma,mi->ia', t1[ka], Foo[ka]) for km in range(nkpts): t1new[ka] += einsum('imae,me->ia', t2[ka, km, ka], Fov[km]) t1new[ka] += -einsum('nf,naif->ia', t1[km], eris.ovov[km, ka, ki]) for kn in range(nkpts): ke = kconserv[km, ki, kn] t1new[ka] += -0.5 * einsum('imef,maef->ia', t2[ki, km, ke], eris.ovvv[km, ka, ke]) t1new[ka] += -0.5 * einsum('mnae,nmei->ia', t2[km, kn, ka], eris_oovo[kn, km, ke]) # T2 equation # For conj(), see Hirata and Bartlett, Eq. (36) t2new = eris.oovv.copy().conj() for ki in range(nkpts): for kj in range(nkpts): for ka in range(nkpts): # Chemist's notation for momentum conserving t2(ki,kj,ka,kb) kb = kconserv[ki, ka, kj] Ftmp = Fvv[kb] - 0.5 * einsum('mb,me->be', t1[kb], Fov[kb]) tmp = einsum('ijae,be->ijab', t2[ki, kj, ka], Ftmp) t2new[ki, kj, ka] += tmp Ftmp = Fvv[ka] - 0.5 * einsum('ma,me->ae', t1[ka], Fov[ka]) tmp = einsum('ijbe,ae->ijab', t2[ki, kj, kb], Ftmp) t2new[ki, kj, ka] -= tmp #t2new[ki,kj,kb] -= tmp.transpose(0,1,3,2) Ftmp = Foo[kj] + 0.5 * einsum('je,me->mj', t1[kj], Fov[kj]) tmp = einsum('imab,mj->ijab', t2[ki, kj, ka], Ftmp) t2new[ki, kj, ka] -= tmp Ftmp = Foo[ki] + 0.5 * einsum('ie,me->mi', t1[ki], Fov[ki]) tmp = einsum('jmab,mi->ijab', t2[kj, ki, ka], Ftmp) t2new[ki, kj, ka] += tmp #t2new[kj,ki,ka] += tmp.transpose(1,0,2,3) for km in range(nkpts): # Wminj # - km - kn + ka + kb = 0 # => kn = ka - km + kb kn = kconserv[ka, km, kb] t2new[ki, kj, ka] += 0.5 * einsum( 'mnab,mnij->ijab', tau[km, kn, ka], Woooo[km, kn, ki]) ke = km t2new[ki, kj, ka] += 0.5 * einsum( 'ijef,abef->ijab', tau[ki, kj, ke], Wvvvv[ka, kb, ke]) # Wmbej # - km - kb + ke + kj = 0 # => ke = km - kj + kb ke = kconserv[km, kj, kb] tmp = einsum('imae,mbej->ijab', t2[ki, km, ka], Wovvo[km, kb, ke]) # - km - kb + ke + kj = 0 # => ke = km - kj + kb # # t[i,e] => ki = ke # t[m,a] => km = ka if km == ka and ke == ki: tmp -= einsum('ie,ma,mbej->ijab', t1[ki], t1[km], eris_ovvo[km, kb, ke]) t2new[ki, kj, ka] += tmp t2new[ki, kj, kb] -= tmp.transpose(0, 1, 3, 2) t2new[kj, ki, ka] -= tmp.transpose(1, 0, 2, 3) t2new[kj, ki, kb] += tmp.transpose(1, 0, 3, 2) ke = ki tmp = einsum('ie,abej->ijab', t1[ki], eris_vvvo[ka, kb, ke]) t2new[ki, kj, ka] += tmp # P(ij) term ke = kj tmp = einsum('je,abei->ijab', t1[kj], eris_vvvo[ka, kb, ke]) t2new[ki, kj, ka] -= tmp km = ka tmp = einsum('ma,mbij->ijab', t1[ka], eris.ovoo[km, kb, ki]) t2new[ki, kj, ka] -= tmp # P(ab) term km = kb tmp = einsum('mb,maij->ijab', t1[kb], eris.ovoo[km, ka, ki]) t2new[ki, kj, ka] += tmp eia = numpy.zeros(shape=t1new.shape, dtype=t1new.dtype) for ki in range(nkpts): for i in range(nocc): for a in range(nvir): eia[ki, i, a] = foo[ki, i, i] - fvv[ki, a, a] t1new[ki] /= eia[ki] eijab = numpy.zeros(shape=t2new.shape, dtype=t2new.dtype) kconserv = tools.get_kconserv(cc._scf.cell, cc.kpts) for ki in range(nkpts): for kj in range(nkpts): for ka in range(nkpts): kb = kconserv[ki, ka, kj] for i in range(nocc): for a in range(nvir): for j in range(nocc): for b in range(nvir): eijab[ki, kj, ka, i, j, a, b] = (foo[ki, i, i] + foo[kj, j, j] - fvv[ka, a, a] - fvv[kb, b, b]) t2new[ki, kj, ka] /= eijab[ki, kj, ka] time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def rotate_orb_cc(iah, u0, conv_tol_grad=None, verbose=logger.NOTE): t2m = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(sys.stdout, verbose) if conv_tol_grad is None: conv_tol_grad = iah.conv_tol_grad g_orb, h_op, h_diag = iah.gen_g_hop(u0) g_kf = g_orb norm_gkf = norm_gorb = numpy.linalg.norm(g_orb) log.debug(' |g|= %4.3g (keyframe)', norm_gorb) t3m = log.timer('gen h_op', *t2m) if h_diag is None: def precond(x, e): return x else: def precond(x, e): hdiagd = h_diag - (e - iah.ah_level_shift) hdiagd[abs(hdiagd) < 1e-8] = 1e-8 x = x / hdiagd return x def scale_down_step(dxi, hdxi, norm_gorb): dxmax = abs(dxi).max() if dxmax > iah.max_stepsize: scale = iah.max_stepsize / dxmax log.debug1('Scale rotation by %g', scale) dxi *= scale hdxi *= scale return dxi, hdxi class Statistic: def __init__(self): self.imic = 0 self.tot_hop = 0 self.tot_kf = 0 kf_trust_region = iah.kf_trust_region g_op = lambda: g_orb x0_guess = g_orb while True: g_orb0 = g_orb stat = Statistic() dr = 0 ikf = 0 for ah_conv, ihop, w, dxi, hdxi, residual, seig \ in davidson_cc(h_op, g_op, precond, x0_guess, tol=iah.ah_conv_tol, max_cycle=iah.ah_max_cycle, lindep=iah.ah_lindep, verbose=log): stat.tot_hop = ihop norm_residual = numpy.linalg.norm(residual) if (ah_conv or ihop == iah.ah_max_cycle or # make sure to use the last step ((norm_residual < iah.ah_start_tol) and (ihop >= iah.ah_start_cycle)) or (seig < iah.ah_lindep)): stat.imic += 1 dxmax = abs(dxi).max() dxi, hdxi = scale_down_step(dxi, hdxi, norm_gorb) dr = dr + dxi g_orb = g_orb + hdxi norm_dr = numpy.linalg.norm(dr) norm_gorb = numpy.linalg.norm(g_orb) log.debug( ' imic %d(%d) |g|= %4.3g |dxi|= %4.3g ' 'max(|x|)= %4.3g |dr|= %4.3g eig= %4.3g seig= %4.3g', stat.imic, ihop, norm_gorb, numpy.linalg.norm(dxi), dxmax, norm_dr, w, seig) max_cycle = max( iah.max_iters, iah.max_iters - int(numpy.log(norm_gkf + 1e-9) * 2)) log.debug1('Set max_cycle %d', max_cycle) ikf += 1 if stat.imic > 3 and norm_gorb > norm_gkf * iah.ah_trust_region: g_orb = g_orb - hdxi dr = dr - dxi norm_gorb = numpy.linalg.norm(g_orb) log.debug('|g| >> keyframe, Restore previouse step') break elif (stat.imic >= max_cycle or norm_gorb < conv_tol_grad * .2): break elif ( ikf > 2 and # avoid frequent keyframe ( ikf >= max(iah.kf_interval, iah.kf_interval - numpy.log(norm_dr + 1e-9)) or # Insert keyframe if the keyframe and the esitimated g_orb are too different norm_gorb < norm_gkf / kf_trust_region)): ikf = 0 g_kf1 = iah.get_grad(iah.extract_rotation(dr, u0)) stat.tot_kf += 1 norm_gkf1 = numpy.linalg.norm(g_kf1) norm_dg = numpy.linalg.norm(g_kf1 - g_orb) log.debug( 'Adjust keyframe g_orb to |g|= %4.3g ' '|g-correction|= %4.3g', norm_gkf1, norm_dg) if (norm_dg < norm_gorb * iah.ah_trust_region # kf not too diff #or norm_gkf1 < norm_gkf # grad is decaying # close to solution or norm_gkf1 < conv_tol_grad * iah.ah_trust_region): kf_trust_region = min( max(norm_gorb / (norm_dg + 1e-9), iah.kf_trust_region), 10) log.debug1('Set kf_trust_region = %g', kf_trust_region) g_orb = g_kf = g_kf1 norm_gorb = norm_gkf = norm_gkf1 else: g_orb = g_orb - hdxi dr = dr - dxi norm_gorb = numpy.linalg.norm(g_orb) log.debug( 'Out of trust region. Restore previouse step') break u = iah.extract_rotation(dr) log.debug(' tot inner=%d |g|= %4.3g |u-1|= %4.3g', stat.imic, norm_gorb, numpy.linalg.norm(numpy.tril(u, -1))) h_op = h_diag = None t3m = log.timer('aug_hess in %d inner iters' % stat.imic, *t3m) u0 = (yield u, g_kf, stat) g_kf, h_op, h_diag = iah.gen_g_hop(u0) norm_gkf = numpy.linalg.norm(g_kf) norm_dg = numpy.linalg.norm(g_kf - g_orb) log.debug(' |g|= %4.3g (keyframe), |g-correction|= %4.3g', norm_gkf, norm_dg) kf_trust_region = min( max(norm_gorb / (norm_dg + 1e-9), iah.kf_trust_region), 10) log.debug1('Set kf_trust_region = %g', kf_trust_region) g_orb = g_kf norm_gorb = norm_gkf x0_guess = dxi
def __init__(self, cc, mo_coeff=None, method='incore'): cput0 = (time.clock(), time.time()) moidx = numpy.ones(shape=cc.mo_energy.shape, dtype=numpy.bool) nkpts = cc.nkpts nmo = cc.nmo() # TODO change this for k-points ... seems like it should work if isinstance(cc.frozen, (int, numpy.integer)): for k in range(nkpts): moidx[k, :cc.frozen] = False elif len(cc.frozen) > 0: for k in range(nkpts): moidx[k, numpy.asarray(cc.frozen)] = False if mo_coeff is None: # TODO make this work for frozen maybe... seems like it should work self.mo_coeff = numpy.zeros((nkpts, nmo, nmo), dtype=numpy.complex128) for k in range(nkpts): self.mo_coeff[k] = cc.mo_coeff[k][:, moidx[k]] mo_coeff = self.mo_coeff self.fock = numpy.zeros((nkpts, nmo, nmo)) for k in range(nkpts): self.fock[k] = numpy.diag(cc.mo_energy[k][moidx[k]]) else: # If mo_coeff is not canonical orbital # TODO does this work for k-points? changed to conjugate. self.mo_coeff = mo_coeff = mo_coeff[:, moidx] dm = cc._scf.make_rdm1(cc.mo_coeff, cc.mo_occ) fockao = cc._scf.get_hcore() + cc._scf.get_veff(cc.mol, dm) self.fock = reduce(numpy.dot, (numpy.conj(mo_coeff.T), fockao, mo_coeff)) nocc = cc.nocc() nmo = cc.nmo() nvir = nmo - nocc mem_incore, mem_outcore, mem_basic = pyscf.cc.ccsd._mem_usage( nocc, nvir) mem_now = pyscf.lib.current_memory()[0] # Convert to spin-orbitals and anti-symmetrize so_coeff = numpy.zeros((nkpts, nmo / 2, nmo), dtype=numpy.complex128) so_coeff[:, :, ::2] = so_coeff[:, :, 1::2] = mo_coeff[:, :nmo / 2, ::2] log = logger.Logger(cc.stdout, cc.verbose) if (method == 'incore' and cc._scf._eri is None and (mem_incore + mem_now < cc.max_memory) or cc.mol.incore_anyway): kconserv = tools.get_kconserv(cc._scf.cell, cc.kpts) eri = numpy.zeros((nkpts, nkpts, nkpts, nmo, nmo, nmo, nmo), dtype=numpy.complex128) for kp in range(nkpts): for kq in range(nkpts): for kr in range(nkpts): ks = kconserv[kp, kq, kr] eri_kpt = pyscf.pbc.ao2mo.general( cc._scf.cell, (so_coeff[kp, :, :], so_coeff[kq, :, :], so_coeff[kr, :, :], so_coeff[ks, :, :]), (cc.kpts[kp], cc.kpts[kq], cc.kpts[kr], cc.kpts[ks])) eri_kpt = eri_kpt.reshape(nmo, nmo, nmo, nmo) eri[kp, kq, kr] = eri_kpt.copy() eri[:, :, :, ::2, 1::2] = eri[:, :, :, 1::2, ::2] = eri[:, :, :, :, :, ::2, 1::2] = eri[:, :, :, :, :, 1::2, ::2] = 0. # Checking some things... maxdiff = 0.0 for kp in range(nkpts): for kq in range(nkpts): for kr in range(nkpts): ks = kconserv[kp, kq, kr] for p in range(nmo): for q in range(nmo): for r in range(nmo): for s in range(nmo): pqrs = eri[kp, kq, kr, p, q, r, s] rspq = eri[kr, ks, kp, r, s, p, q] diff = numpy.linalg.norm(pqrs - rspq).real if diff > 1e-5: print "** Warning: ERI diff at ", print "kp,kq,kr,ks,p,q,r,s =", kp, kq, kr, ks, p, q, r, s maxdiff = max(maxdiff, diff) print "Max difference in (pq|rs) - (rs|pq) = %.15g" % maxdiff #print "ERI =" #print eri # Antisymmetrizing (pq|rs)-(ps|rq), where the latter integral is equal to # (rq|ps); done since we aren't tracking the kpoint of orbital 's' eri1 = eri - eri.transpose(2, 1, 0, 5, 4, 3, 6) # Chemist -> physics notation eri1 = eri1.transpose(0, 2, 1, 3, 5, 4, 6) self.dtype = eri1.dtype self.oooo = eri1[:, :, :, :nocc, :nocc, :nocc, :nocc].copy( ) / nkpts self.ooov = eri1[:, :, :, :nocc, :nocc, :nocc, nocc:].copy() / nkpts self.ovoo = eri1[:, :, :, :nocc, nocc:, :nocc, :nocc].copy() / nkpts self.oovv = eri1[:, :, :, :nocc, :nocc, nocc:, nocc:].copy() / nkpts self.ovov = eri1[:, :, :, :nocc, nocc:, :nocc, nocc:].copy() / nkpts self.ovvv = eri1[:, :, :, :nocc, nocc:, nocc:, nocc:].copy() / nkpts self.vvvv = eri1[:, :, :, nocc:, nocc:, nocc:, nocc:].copy() / nkpts #ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy() #self.ovvv = numpy.empty((nocc,nvir,nvir*(nvir+1)//2)) #for i in range(nocc): # for j in range(nvir): # self.ovvv[i,j] = lib.pack_tril(ovvv[i,j]) #self.vvvv = pyscf.ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) log.timer('CCSD integral transformation', *cput0)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1, 3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1, 1, 3) - kpts.reshape(1, -1, 3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset, nband, nao, nao)) vkI = numpy.zeros((nset, nband, nao, nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx, kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx, kpti_idx] = False max_memory1 = max_memory * (nkptj + 1) / (nkptj + 5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) #buf1R = numpy.empty((blksize*nao**2)) #buf1I = numpy.empty((blksize*nao**2)) for aoaoks, p0, p1 in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): coulG = numpy.sqrt(vkcoulG[p0:p1]) nG = p1 - p0 bufR = numpy.empty((nG * nao**2)) bufI = numpy.empty((nG * nao**2)) buf1R = numpy.empty((nG * nao**2)) buf1I = numpy.empty((nG * nao**2)) for k, aoao in enumerate(aoaoks): ki = kpti_idx[k] kj = kptj_idx[k] # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = numpy.ndarray((nao, nG, nao), buffer=bufR) pLqI = numpy.ndarray((nao, nG, nao), buffer=bufI) pLqR[:] = aoao.real.reshape(nG, nao, nao).transpose(1, 0, 2) pLqI[:] = aoao.imag.reshape(nG, nao, nao).transpose(1, 0, 2) pLqR *= coulG.reshape(1, nG, 1) pLqI *= coulG.reshape(1, nG, 1) iLkR = numpy.ndarray((nao * nG, nao), buffer=buf1R) iLkI = numpy.ndarray((nao * nG, nao), buffer=buf1I) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1, nao), pLqI.reshape(-1, nao), dmsR[i, kj], dmsI[i, kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao, -1), iLkI.reshape(nao, -1), pLqR.reshape(nao, -1).T, pLqI.reshape(nao, -1).T, 1, vkR[i, ki], vkI[i, ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao, -1) iLkI = iLkI.reshape(nao, -1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i, ki], dmsI[i, ki], pLqR.reshape(nao, -1), pLqI.reshape(nao, -1), 1, iLkR, iLkI) zdotCN( pLqR.reshape(-1, nao).T, pLqI.reshape(-1, nao).T, iLkR.reshape(-1, nao), iLkI.reshape(-1, nao), 1, vkR[i, kj], vkI[i, kj], 1) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki, kj]: make_kpt(kptj - kpti) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1. / nkpts # G=0 was not included in the non-uniform grids if cell.dimension != 3 and exxdiv: assert (exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def general(eri_ao, mo_coeffs, verbose=0, compact=True, **kwargs): r'''For the given four sets of orbitals, transfer the 8-fold or 4-fold 2e AO integrals to MO integrals. Args: eri_ao : ndarray AO integrals, can be either 8-fold or 4-fold symmetry. mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) Kwargs: verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: 2D array of transformed MO integrals. The MO integrals may or may not have the permutation symmetry, depending on the given orbitals, and the kwargs compact. If the four sets of orbitals are identical, the MO integrals will at most have 4-fold symmetry. Examples: >>> from pyscf import gto >>> from pyscf.scf import _vhf >>> from pyscf import ao2mo >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> eri = _vhf.int2e_sph(mol._atm, mol._bas, mol._env) >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo4)) >>> print(eri1.shape) (80, 24) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo3)) >>> print(eri1.shape) (80, 21) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo3), compact=False) >>> print(eri1.shape) (80, 36) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo1,mo2,mo2)) >>> print(eri1.shape) (55, 36) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo1,mo2)) >>> print(eri1.shape) (80, 80) ''' if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(sys.stdout, verbose) nao = mo_coeffs[0].shape[0] nao_pair = nao*(nao+1)//2 assert(eri_ao.size in (nao_pair**2, nao_pair*(nao_pair+1)//2)) # transform e1 eri1 = half_e1(eri_ao, mo_coeffs, compact) klmosym, nkl_pair, mokl, klshape = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) if eri1.shape[0] == 0 or nkl_pair == 0: # 0 dimension sometimes causes blas problem return numpy.zeros((nij_pair,nkl_pair)) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') # transform e2 eri1 = _ao2mo.nr_e2(eri1, mokl, klshape, aosym='s4', mosym=klmosym) return eri1
def _make_eris_outcore(mycc, mo_coeff=None): cput0 = (time.clock(), time.time()) log = logger.Logger(mycc.stdout, mycc.verbose) eris = _ChemistsERIs() eris._common_init_(mycc, mo_coeff) mol = mycc.mol mo_coeff = eris.mo_coeff nocc = eris.nocc nao, nmo = mo_coeff.shape nvir = nmo - nocc orbo = mo_coeff[:, :nocc] orbv = mo_coeff[:, nocc:] nvpair = nvir * (nvir + 1) // 2 eris.feri1 = lib.H5TmpFile() eris.oooo = eris.feri1.create_dataset('oooo', (nocc, nocc, nocc, nocc), 'f8') eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc, nvir, nocc, nocc), 'f8', chunks=(nocc, 1, nocc, nocc)) eris.ovov = eris.feri1.create_dataset('ovov', (nocc, nvir, nocc, nvir), 'f8', chunks=(nocc, 1, nocc, nvir)) eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc, nvir, nvir, nocc), 'f8', chunks=(nocc, 1, nvir, nocc)) eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc, nvir, nvir, nvir), 'f8') eris.oovv = eris.feri1.create_dataset('oovv', (nocc, nocc, nvir, nvir), 'f8', chunks=(nocc, nocc, 1, nvir)) eris.vvvv = eris.feri1.create_dataset('vvvv', (nvir, nvir, nvir, nvir), 'f8') max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0]) ftmp = lib.H5TmpFile() ao2mo.full(mol, mo_coeff, ftmp, max_memory=max_memory, verbose=log) eri = ftmp['eri_mo'] nocc_pair = nocc * (nocc + 1) // 2 tril2sq = lib.square_mat_in_trilu_indices(nmo) oo = eri[:nocc_pair] eris.oooo[:] = ao2mo.restore(1, oo[:, :nocc_pair], nocc) oovv = lib.take_2d(oo, tril2sq[:nocc, :nocc].ravel(), tril2sq[nocc:, nocc:].ravel()) eris.oovv[:] = oovv.reshape(nocc, nocc, nvir, nvir) oo = oovv = None tril2sq = lib.square_mat_in_trilu_indices(nmo) blksize = min(nvir, max(BLKMIN, int(max_memory * 1e6 / 8 / nmo**3 / 2))) for p0, p1 in lib.prange(0, nvir, blksize): q0, q1 = p0 + nocc, p1 + nocc off0 = q0 * (q0 + 1) // 2 off1 = q1 * (q1 + 1) // 2 buf = lib.unpack_tril(eri[off0:off1]) tmp = buf[tril2sq[q0:q1, :nocc] - off0] eris.ovoo[:, p0:p1] = tmp[:, :, :nocc, :nocc].transpose(1, 0, 2, 3) eris.ovvo[:, p0:p1] = tmp[:, :, nocc:, :nocc].transpose(1, 0, 2, 3) eris.ovov[:, p0:p1] = tmp[:, :, :nocc, nocc:].transpose(1, 0, 2, 3) eris.ovvv[:, p0:p1] = tmp[:, :, nocc:, nocc:].transpose(1, 0, 2, 3) tmp = buf[tril2sq[q0:q1, nocc:q1] - off0] eris.vvvv[p0:p1, :p1] = tmp[:, :, nocc:, nocc:] if p0 > 0: eris.vvvv[:p0, p0:p1] = tmp[:, :p0, nocc:, nocc:].transpose(1, 0, 2, 3) buf = tmp = None log.timer('CCSD integral transformation', *cput0) return eris
def kernel(self): from pyscf.mcscf.addons import StateAverageFCISolver if isinstance(self.fcisolver, StateAverageFCISolver): raise RuntimeError( 'State-average FCI solver object cannot be used ' 'in NEVPT2 calculation.\nA separated multi-root ' 'CASCI calculation is required for NEVPT2 method. ' 'See examples/mrpt/41-for_state_average.py.') if getattr(self._mc, 'frozen', None) is not None: raise NotImplementedError if isinstance(self.verbose, logger.Logger): log = self.verbose else: log = logger.Logger(self.stdout, self.verbose) time0 = (logger.process_clock(), logger.perf_counter()) ncore = self.ncore ncas = self.ncas nocc = ncore + ncas #By defaut, _mc is canonicalized for the first root. #For SC-NEVPT based on compressed MPS perturber functions, the _mc was already canonicalized. if (not self.canonicalized): self.mo_coeff, _, self.mo_energy = self.canonicalize( self.mo_coeff, ci=self.load_ci(), verbose=self.verbose) if getattr(self.fcisolver, 'nevpt_intermediate', None): logger.info(self, 'DMRG-NEVPT') dm1, dm2, dm3 = self.fcisolver._make_dm123(self.load_ci(), ncas, self.nelecas, None) else: dm1, dm2, dm3 = fci.rdm.make_dm123('FCI3pdm_kern_sf', self.load_ci(), self.load_ci(), ncas, self.nelecas) dm4 = None dms = { '1': dm1, '2': dm2, '3': dm3, '4': dm4, # 'h1': hdm1, 'h2': hdm2, 'h3': hdm3 } time1 = log.timer('3pdm, 4pdm', *time0) eris = _ERIS(self, self.mo_coeff) time1 = log.timer('integral transformation', *time1) if not getattr(self.fcisolver, 'nevpt_intermediate', None): # regular FCI solver link_indexa = fci.cistring.gen_linkstr_index( range(ncas), self.nelecas[0]) link_indexb = fci.cistring.gen_linkstr_index( range(ncas), self.nelecas[1]) aaaa = eris['ppaa'][ncore:nocc, ncore:nocc].copy() f3ca = _contract4pdm('NEVPTkern_cedf_aedf', aaaa, self.load_ci(), ncas, self.nelecas, (link_indexa, link_indexb)) f3ac = _contract4pdm('NEVPTkern_aedf_ecdf', aaaa, self.load_ci(), ncas, self.nelecas, (link_indexa, link_indexb)) dms['f3ca'] = f3ca dms['f3ac'] = f3ac time1 = log.timer('eri-4pdm contraction', *time1) if self.compressed_mps: from pyscf.dmrgscf.nevpt_mpi import DMRG_COMPRESS_NEVPT if self.stored_integral: #Stored perturbation integral and read them again. For debugging purpose. perturb_file = DMRG_COMPRESS_NEVPT( self, maxM=self.maxM, root=self.root, nevptsolver=self.nevptsolver, tol=self.tol, nevpt_integral='nevpt_perturb_integral') else: perturb_file = DMRG_COMPRESS_NEVPT( self, maxM=self.maxM, root=self.root, nevptsolver=self.nevptsolver, tol=self.tol) fh5 = h5py.File(perturb_file, 'r') e_Si = fh5['Vi/energy'][()] #The definition of norm changed. #However, there is no need to print out it. #Only perturbation energy is wanted. norm_Si = fh5['Vi/norm'][()] e_Sr = fh5['Vr/energy'][()] norm_Sr = fh5['Vr/norm'][()] fh5.close() logger.note(self, "Sr (-1)', E = %.14f", e_Sr) logger.note(self, "Si (+1)', E = %.14f", e_Si) else: norm_Sr, e_Sr = Sr(self, self.load_ci(), dms, eris) logger.note(self, "Sr (-1)', E = %.14f", e_Sr) time1 = log.timer("space Sr (-1)'", *time1) norm_Si, e_Si = Si(self, self.load_ci(), dms, eris) logger.note(self, "Si (+1)', E = %.14f", e_Si) time1 = log.timer("space Si (+1)'", *time1) norm_Sijrs, e_Sijrs = Sijrs(self, eris) logger.note(self, "Sijrs (0) , E = %.14f", e_Sijrs) time1 = log.timer('space Sijrs (0)', *time1) norm_Sijr, e_Sijr = Sijr(self, dms, eris) logger.note(self, "Sijr (+1) , E = %.14f", e_Sijr) time1 = log.timer('space Sijr (+1)', *time1) norm_Srsi, e_Srsi = Srsi(self, dms, eris) logger.note(self, "Srsi (-1) , E = %.14f", e_Srsi) time1 = log.timer('space Srsi (-1)', *time1) norm_Srs, e_Srs = Srs(self, dms, eris) logger.note(self, "Srs (-2) , E = %.14f", e_Srs) time1 = log.timer('space Srs (-2)', *time1) norm_Sij, e_Sij = Sij(self, dms, eris) logger.note(self, "Sij (+2) , E = %.14f", e_Sij) time1 = log.timer('space Sij (+2)', *time1) norm_Sir, e_Sir = Sir(self, dms, eris) logger.note(self, "Sir (0)' , E = %.14f", e_Sir) time1 = log.timer("space Sir (0)'", *time1) nevpt_e = e_Sr + e_Si + e_Sijrs + e_Sijr + e_Srsi + e_Srs + e_Sij + e_Sir logger.note(self, "Nevpt2 Energy = %.15f", nevpt_e) log.timer('SC-NEVPT2', *time0) self.e_corr = nevpt_e return nevpt_e
def solve_mo1(sscobj, mo_energy=None, mo_coeff=None, mo_occ=None, h1=None, s1=None, with_cphf=None): cput1 = (time.clock(), time.time()) log = logger.Logger(sscobj.stdout, sscobj.verbose) if mo_energy is None: mo_energy = sscobj._scf.mo_energy if mo_coeff is None: mo_coeff = sscobj._scf.mo_coeff if mo_occ is None: mo_occ = sscobj._scf.mo_occ if with_cphf is None: with_cphf = sscobj.cphf mol = sscobj.mol if sscobj.mb.upper().startswith('ST'): # Sternheim approximation nmo = mo_occ.size mo_energy = mo_energy[nmo // 2:] mo_coeff = mo_coeff[:, nmo // 2:] mo_occ = mo_occ[nmo // 2:] if h1 is None: atmlst = sorted(set([j for i, j in sscobj.nuc_pair])) h1 = numpy.asarray(make_h1(mol, mo_coeff, mo_occ, atmlst)) if with_cphf: if callable(with_cphf): vind = with_cphf else: vind = gen_vind(sscobj._scf, mo_coeff, mo_occ) mo1, mo_e1 = cphf.solve(vind, mo_energy, mo_occ, h1, None, sscobj.max_cycle_cphf, sscobj.conv_tol, verbose=log) else: e_ai = lib.direct_sum('i-a->ai', mo_energy[mo_occ > 0], mo_energy[mo_occ == 0]) mo1 = h1 / e_ai mo_e1 = None # Calculate RMB with approximation # |MO1> = Z_RMB |i> + |p> bar{C}_{pi}^1 ~= |p> C_{pi}^1 # bar{C}_{pi}^1 ~= C_{pi}^1 - <p|Z_RMB|i> if sscobj.mb.upper() == 'RMB': orbo = mo_coeff[:, mo_occ > 0] orbv = mo_coeff[:, mo_occ == 0] n4c = mo_coeff.shape[0] n2c = n4c // 2 c = lib.param.LIGHT_SPEED orbvS_T = orbv[n2c:].conj().T for ia in atmlst: mol.set_rinv_origin(mol.atom_coord(ia)) a01int = mol.intor('int1e_sa01sp_spinor', 3) for k in range(3): s1 = orbvS_T.dot(a01int[k].conj().T).dot(orbo[n2c:]) mo1[ia * 3 + k] -= s1 * (.25 / c**2) logger.timer(sscobj, 'solving mo1 eqn', *cput1) return mo1, mo_e1
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 if d2 is None: _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) dovov = fd2intermediate['dovov'] dvvvv = fd2intermediate['dvvvv'] doooo = fd2intermediate['doooo'] doovv = fd2intermediate['doovv'] dovvo = fd2intermediate['dovvo'] dovvv = fd2intermediate['dovvv'] dooov = fd2intermediate['dooov'] else: dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name, 'w') fswap.create_group('e_vvov') fswap.create_group('c_vvov') # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) blksize = 8 for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape( nocc, nvir, nocc, nvir) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) eris_ovov = None fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2) d_ovov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) iobuflen = int(256e6 / 8 / (blksize * nvir)) log.debug1( 'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): d_ooov = _cp(dooov[p0:p1]) eris_oooo = _cp(eris.oooo[p0:p1]) eris_ooov = _cp(eris.ooov[p0:p1]) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv[p0:p1]) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) Ioo += lib.dot( _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov[p0:p1]) #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov) for i in range(p1 - p0): lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape(nocc, -1).T, 1, Ioo, 1) lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape(nocc, -1).T, 1, Xvo, 1) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2) for i in range(p1 - p0): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot( d_oovv.reshape(-1, nvir).T, _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))) eris_ooov = None d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape( p1 - p0, nocc, -1) d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape(nocc, -1).T) eris_oovv = None # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir)) ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv, iobuflen) c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair) eris_ovx = _cp(eris.ovvv[p0:p1]) ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep, eris_ovx.reshape(-1, nvir_pair), iobuflen) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) c_ovvv = d_oovv = None eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(d_ovvv[i]) eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1) #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov) Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc)) eris_ovvo = eris_ovov = None eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair)) eris_ovx = None eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir)) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_ovvo = _cp(fswap['dovvo'][p0:p1]) #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) d_ovvv = d_ovvo = eris_ovvv = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 + nvir**3 * 2.5 blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) log.debug1( 'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv) eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1])) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2)) d_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1, d_vvov.reshape(-1, nov)) d_vvvo = _cp(d_vvov.transpose(0, 2, 1)) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) d_vvov = eris_vvvv = None eris_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1, eris_vvov.reshape(-1, nov)) eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1)) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) eris_vvov = eris_vovv = d_vvvv = None del (fswap['e_vvov']) del (fswap['c_vvov']) del (fswap['dovvo']) fswap.close() _tmpfile = None if d2 is None: for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() _d2tmpfile = None Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def cis_H(cis, kshift, eris=None): """Build full Hamiltonian matrix in the space of single excitation, i.e. CIS Hamiltonian. Arguments: cis {KCIS} -- A KCIS instance kshift {int} -- k-shift index. A k-shift vector is an exciton momentum. Available k-shift indices depend on the k-point mesh. For example, a 2 by 2 by 2 k-point mesh allows at most 8 k-shift values, which can be targeted by 0, 1, 2, 3, 4, 5, 6, or 7. Keyword Arguments: eris {_CIS_ERIS} -- Depending on cis.direct, eris may contain 4-center (cis.direct=False) or 3-center (cis.direct=True) electron repulsion integrals (default: {None}) Raises: MemoryError: MemoryError will be raise if there is not enough space to store the full Hamiltonian matrix, which scales as Nk^2 O^2 V^2 Returns: 2D array -- the Hamiltonian matrix reshaped into (ki,i,a) by (kj,j,b) """ cpu0 = (time.clock(), time.time()) log = logger.Logger(cis.stdout, cis.verbose) if eris is None: eris = cis.ao2mo() nkpts = cis.nkpts nocc = cis.nocc nmo = cis.nmo nvir = nmo - nocc nov = nocc * nvir r_size = nkpts * nov memory_needed = (r_size**2) * 16 / 1e6 memory_now = lib.current_memory()[0] if memory_needed + memory_now >= cis.max_memory: raise MemoryError("Not enough memory to store full CIS Hamiltonian") kconserv_r = cis.get_kconserv_r(kshift) dtype = eris.dtype epsilons = [eris.fock[k].diagonal().real for k in range(nkpts)] H = np.zeros((nkpts, nkpts, nov, nov), dtype=dtype) # <ia|H|jb> <- (esp_a - esp_i) \delta{i,j} \delta{a,b} for ki in range(nkpts): ka = kconserv_r[ki] diag_ia = direct_sum("a-i->ia", epsilons[ka][nocc:], epsilons[ki][:nocc]) diag_ia = np.ravel(diag_ia) np.fill_diagonal(H[ki, ki], diag_ia) # <ia|H|jb> <- 2<ja|bi> - <ja|ib> if not cis.direct: for ki in range(nkpts): ka = kconserv_r[ki] for kj in range(nkpts): kb = kconserv_r[kj] # contribution from 2 <ja|bi> = 2 <aj|ib> tmp = 2. * eris.voov[ka, kj, ki].transpose(2, 0, 1, 3) # contribution from -<ja|ib> tmp -= eris.ovov[kj, ka, ki].transpose(2, 1, 0, 3) H[ki, kj] += tmp.reshape(nov, nov) else: for ki in range(nkpts): ka = kconserv_r[ki] for kj in range(nkpts): kb = kconserv_r[kj] # contribution from 2 (ai|jb) = 2 B^L_ai B^L_jb tmp = 2. * einsum("Lai,Ljb->iajb", eris.Lpq_mo[ka, ki][:, nocc:, :nocc], eris.Lpq_mo[kj, kb][:, :nocc, nocc:]) # contribution from -(ab|ji) = - B^L_ab B^L_ji tmp -= einsum("Lab,Lji->iajb", eris.Lpq_mo[ka, kb][:, nocc:, nocc:], eris.Lpq_mo[kj, ki][:, :nocc, :nocc]) tmp *= 1. / nkpts H[ki, kj] += tmp.reshape(nov, nov) H = H.reshape(nkpts, nkpts, nocc, nvir, nocc, nvir).transpose(0, 2, 3, 1, 4, 5).reshape(r_size, r_size) log.timer("build full CIS Hamiltonian", *cpu0) return H
def ipccsd_star_contract(eom, ipccsd_evals, ipccsd_evecs, lipccsd_evecs, imds=None): """ Returns: e_star (list of float): The IP-CCSD* energy. Notes: The user should check to make sure the right and left eigenvalues before running the perturbative correction. The 2hp right amplitudes are assumed to be of the form s^{a }_{ij}, i.e. the (ia) indices are coupled while the left are assumed to be of the form s^{ b}_{ij}, i.e. the (jb) indices are coupled. Reference: Saeh, Stanton "...energy surfaces of radicals" JCP 111, 8275 (1999) """ assert (eom.partition == None) cpu1 = cpu0 = (time.clock(), time.time()) log = logger.Logger(eom.stdout, eom.verbose) if imds is None: imds = eom.make_imds() t1, t2 = imds.t1, imds.t2 eris = imds.eris assert (isinstance(eris, gccsd._PhysicistsERIs)) fock = eris.fock nocc, nvir = t1.shape nmo = nocc + nvir fov = fock[:nocc, nocc:] foo = fock[:nocc, :nocc].diagonal() fvv = fock[nocc:, nocc:].diagonal() oovv = _cp(eris.oovv) ovvv = _cp(eris.ovvv) ovov = _cp(eris.ovov) ovvo = -_cp(eris.ovov).transpose(0, 1, 3, 2) ooov = _cp(eris.ooov) vooo = _cp(ooov).conj().transpose(3, 2, 1, 0) vvvo = _cp(ovvv).conj().transpose(3, 2, 1, 0) oooo = _cp(eris.oooo) # Create denominator eijk = foo[:, None, None] + foo[None, :, None] + foo[None, None, :] eab = fvv[:, None] + fvv[None, :] eijkab = eijk[:, :, :, None, None] - eab[None, None, None, :, :] # Permutation operators def pijk(tmp): '''P(ijk)''' return tmp + tmp.transpose(1, 2, 0, 3, 4) + tmp.transpose( 2, 0, 1, 3, 4) def pab(tmp): '''P(ab)''' return tmp - tmp.transpose(0, 1, 2, 4, 3) def pij(tmp): '''P(ij)''' return tmp - tmp.transpose(1, 0, 2, 3, 4) ipccsd_evecs = np.array(ipccsd_evecs) lipccsd_evecs = np.array(lipccsd_evecs) e_star = [] ipccsd_evecs, lipccsd_evecs = [ np.atleast_2d(x) for x in [ipccsd_evecs, lipccsd_evecs] ] ipccsd_evals = np.atleast_1d(ipccsd_evals) for ip_eval, ip_evec, ip_levec in zip(ipccsd_evals, ipccsd_evecs, lipccsd_evecs): # Enforcing <L|R> = 1 l1, l2 = vector_to_amplitudes_ip(ip_levec, nmo, nocc) r1, r2 = vector_to_amplitudes_ip(ip_evec, nmo, nocc) ldotr = np.dot(l1, r1) + 0.5 * np.dot(l2.ravel(), r2.ravel()) logger.info(eom, 'Left-right amplitude overlap : %14.8e', ldotr) if abs(ldotr) < 1e-7: logger.warn( eom, 'Small %s left-right amplitude overlap. Results ' 'may be inaccurate.', ldotr) l1 /= ldotr l2 /= ldotr # Denominator + eigenvalue(IP-CCSD) denom = eijkab + ip_eval denom = 1. / denom tmp = lib.einsum('ijab,k->ijkab', oovv, l1) lijkab = pijk(tmp) tmp = -lib.einsum('jima,mkb->ijkab', ooov, l2) tmp = pijk(tmp) lijkab += pab(tmp) tmp = lib.einsum('ieab,jke->ijkab', ovvv, l2) lijkab += pijk(tmp) tmp = lib.einsum('mbke,m->bke', ovov, r1) tmp = lib.einsum('bke,ijae->ijkab', tmp, t2) tmp = pijk(tmp) rijkab = -pab(tmp) tmp = lib.einsum('mnjk,n->mjk', oooo, r1) tmp = lib.einsum('mjk,imab->ijkab', tmp, t2) rijkab += pijk(tmp) tmp = lib.einsum('amij,mkb->ijkab', vooo, r2) tmp = pijk(tmp) rijkab -= pab(tmp) tmp = lib.einsum('baei,jke->ijkab', vvvo, r2) rijkab += pijk(tmp) deltaE = (1. / 12) * lib.einsum('ijkab,ijkab,ijkab', lijkab, rijkab, denom) deltaE = deltaE.real logger.info(eom, "Exc. energy, delta energy = %16.12f, %16.12f", ip_eval + deltaE, deltaE) e_star.append(ip_eval + deltaE) return e_star