def trans_e1_incore(eri_ao, mo, ncore, ncas): nmo = mo.shape[1] nocc = ncore + ncas eri1 = pyscf.ao2mo.incore.half_e1(eri_ao, (mo, mo[:, :nocc]), compact=False) eri1 = eri1.reshape(nmo, nocc, -1) klppshape = (0, nmo, 0, nmo) klpashape = (0, nmo, ncore, ncas) aapp = numpy.empty((ncas, ncas, nmo, nmo)) for i in range(ncas): _ao2mo.nr_e2_(eri1[ncore + i, ncore:nocc], mo, klppshape, aosym="s4", mosym="s1", vout=aapp[i]) ppaa = pyscf.lib.transpose(aapp.reshape(ncas * ncas, -1)).reshape(nmo, nmo, ncas, ncas) aapp = None papa = numpy.empty((nmo, ncas, nmo, ncas)) for i in range(nmo): _ao2mo.nr_e2_(eri1[i, ncore:nocc], mo, klpashape, aosym="s4", mosym="s1", vout=papa[i]) pp = numpy.empty((nmo, nmo)) j_cp = numpy.zeros((ncore, nmo)) k_pc = numpy.zeros((nmo, ncore)) for i in range(ncore): _ao2mo.nr_e2_(eri1[i, i : i + 1], mo, klppshape, aosym="s4", mosym="s1", vout=pp) j_cp[i] = pp.diagonal() j_pc = j_cp.T.copy() pp = numpy.empty((ncore, ncore)) for i in range(nmo): klshape = (i, 1, 0, ncore) _ao2mo.nr_e2_(eri1[i, :ncore], mo, klshape, aosym="s4", mosym="s1", vout=pp) k_pc[i] = pp.diagonal() return j_pc, k_pc, ppaa, papa
def _trans_aapp_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo.shape[1] nocc = ncore + ncas c_nmo = ctypes.c_int(nmo) funpack = pyscf.lib.numpy_helper._np_helper.NPdunpack_tril klshape = (0, nmo, 0, nmo) japcv = numpy.empty((ncas,nmo,ncore,nmo-ncore)) aapp = numpy.empty((ncas,ncas,nmo,nmo)) appa = numpy.empty((ncas,nmo,nmo,ncas)) ppp = numpy.empty((nmo,nmo,nmo)) for i in range(ncas): buf = _ao2mo.nr_e2_(fload(ncore+i), mo, klshape, aosym='s4', mosym='s2', ao_loc=ao_loc) for j in range(nmo): funpack(c_nmo, buf[j].ctypes.data_as(ctypes.c_void_p), ppp[j].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(1)) aapp[i] = ppp[ncore:nocc] appa[i] = ppp[:,:,ncore:nocc] #japcv = apcv * 4 - acpv.transpose(0,2,1,3) - avcp.transpose(0,3,2,1) japcv[i] = ppp[:,:ncore,ncore:] * 4 \ - ppp[:ncore,:,ncore:].transpose(1,0,2) \ - ppp[ncore:,:ncore,:].transpose(2,1,0) return aapp, appa, japcv
def get_vind(self, zs): mol = self.mol mo_coeff = self._scf.mo_coeff mo_energy = self._scf.mo_energy nao, nmo = mo_coeff.shape nocc = (self._scf.mo_occ>0).sum() nvir = nmo - nocc orbv = mo_coeff[:,nocc:] orbo = mo_coeff[:,:nocc] eai = pyscf.lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) dai = numpy.sqrt(eai).ravel() nz = len(zs) dmvo = numpy.empty((nz,nao,nao)) for i, z in enumerate(zs): dm = reduce(numpy.dot, (orbv, (dai*z).reshape(nvir,nocc), orbo.T)) dmvo[i] = dm + dm.T # +cc for A+B and K_{ai,jb} in A == K_{ai,bj} in B mem_now = pyscf.lib.current_memory()[0] max_memory = max(2000, self.max_memory*.9-mem_now) v1ao = _contract_xc_kernel(self, self._scf.xc, dmvo, singlet=self.singlet, max_memory=max_memory) if self.singlet: vj = self._scf.get_j(mol, dmvo, hermi=1) v1ao += vj * 2 v1vo = _ao2mo.nr_e2_(v1ao, mo_coeff, (nocc,nvir,0,nocc)).reshape(-1,nvir*nocc) edai = eai.ravel() * dai for i, z in enumerate(zs): # numpy.sqrt(eai) * (eai*dai*z + v1vo) v1vo[i] += edai*z v1vo[i] *= dai return v1vo.reshape(nz,-1)
def get_vind(self, zs): '''Compute Ax''' mo_coeff = self._scf.mo_coeff mo_energy = self._scf.mo_energy nao, nmo = mo_coeff.shape nocc = (self._scf.mo_occ>0).sum() nvir = nmo - nocc orbv = mo_coeff[:,nocc:] orbo = mo_coeff[:,:nocc] nz = len(zs) dmvo = numpy.empty((nz,nao,nao)) for i, z in enumerate(zs): dmvo[i] = reduce(numpy.dot, (orbv, z.reshape(nvir,nocc), orbo.T)) vj, vk = self._scf.get_jk(self.mol, dmvo, hermi=0) if self.singlet: vhf = vj*2 - vk else: vhf = -vk #v1vo = numpy.asarray([reduce(numpy.dot, (orbv.T, v, orbo)) for v in vhf]) v1vo = _ao2mo.nr_e2_(vhf, mo_coeff, (nocc,nvir,0,nocc)).reshape(-1,nvir*nocc) eai = pyscf.lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) eai = eai.ravel() for i, z in enumerate(zs): v1vo[i] += eai * z return v1vo.reshape(nz,-1)
def _trans(mo, ncore, ncas, fload, cvcv=None, ao_loc=None): nao, nmo = mo.shape nocc = ncore + ncas nvir = nmo - nocc nav = nmo - ncore if cvcv is None: cvcv = numpy.zeros((ncore*nvir,ncore*nvir)) pacv = numpy.empty((nmo,ncas,ncore*nvir)) aapp = numpy.empty((ncas,ncas,nmo*nmo)) papa = numpy.empty((nmo,ncas,nmo*ncas)) vcv = numpy.empty((nav,ncore*nvir)) apa = numpy.empty((ncas,nmo*ncas)) vpa = numpy.empty((nav,nmo*ncas)) app = numpy.empty((ncas,nmo*nmo)) for i in range(ncore): buf = fload(i, i+1) klshape = (0, ncore, nocc, nvir) _ao2mo.nr_e2_(buf, mo, klshape, aosym='s4', mosym='s1', out=vcv, ao_loc=ao_loc) cvcv[i*nvir:(i+1)*nvir] = vcv[ncas:] pacv[i] = vcv[:ncas] klshape = (0, nmo, ncore, ncas) _ao2mo.nr_e2_(buf[:ncas], mo, klshape, aosym='s4', mosym='s1', out=apa, ao_loc=ao_loc) papa[i] = apa for i in range(ncas): buf = fload(ncore+i, ncore+i+1) klshape = (0, ncore, nocc, nvir) _ao2mo.nr_e2_(buf, mo, klshape, aosym='s4', mosym='s1', out=vcv, ao_loc=ao_loc) pacv[ncore:,i] = vcv klshape = (0, nmo, ncore, ncas) _ao2mo.nr_e2_(buf, mo, klshape, aosym='s4', mosym='s1', out=vpa, ao_loc=ao_loc) papa[ncore:,i] = vpa klshape = (0, nmo, 0, nmo) _ao2mo.nr_e2_(buf[:ncas], mo, klshape, aosym='s4', mosym='s1', out=app, ao_loc=ao_loc) aapp[i] = app ppaa = pyscf.lib.transpose(aapp.reshape(ncas**2,-1)) return (ppaa.reshape(nmo,nmo,ncas,ncas), papa.reshape(nmo,ncas,nmo,ncas), pacv.reshape(nmo,ncas,ncore,nvir), cvcv)
def get_vind(self, xys): ''' [ A B][X] [-B -A][Y] ''' mol = self.mol mo_coeff = self._scf.mo_coeff mo_energy = self._scf.mo_energy nao, nmo = mo_coeff.shape nocc = (self._scf.mo_occ>0).sum() nvir = nmo - nocc orbv = mo_coeff[:,nocc:] orbo = mo_coeff[:,:nocc] nz = len(xys) dms = numpy.empty((nz*2,nao,nao)) for i in range(nz): x, y = xys[i].reshape(2,nvir,nocc) dmx = reduce(numpy.dot, (orbv, x, orbo.T)) dmy = reduce(numpy.dot, (orbv, y, orbo.T)) dms[i ] = dmx + dmy.T # AX + BY dms[i+nz] = dms[i].T # = dmy + dmx.T # AY + BX hyb = self._scf._numint.hybrid_coeff(self._scf.xc, spin=(mol.spin>0)+1) if abs(hyb) > 1e-10: vj, vk = self._scf.get_jk(self.mol, dms, hermi=0) if self.singlet: veff = vj * 2 - hyb * vk else: veff = -hyb * vk else: if self.singlet: vj = self._scf.get_j(self.mol, dms, hermi=1) veff = vj * 2 else: veff = numpy.zeros((nz*2,nao,nao)) mem_now = pyscf.lib.current_memory()[0] max_memory = max(2000, self.max_memory*.9-mem_now) v1xc = _contract_xc_kernel(self, self._scf.xc, dms[:nz], singlet=self.singlet, max_memory=max_memory) veff[:nz] += v1xc veff[nz:] += v1xc veff = _ao2mo.nr_e2_(veff, mo_coeff, (nocc,nvir,0,nocc)).reshape(-1,nvir*nocc) eai = pyscf.lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) eai = eai.ravel() for i, z in enumerate(xys): x, y = z.reshape(2,-1) veff[i ] += eai * x # AX veff[i+nz] += eai * y # AY hx = numpy.hstack((veff[:nz], -veff[nz:])) return hx.reshape(nz,-1)
def _trans_aapp_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo[0].shape[1] nocc = (ncore[0] + ncas, ncore[1] + ncas) c_nmo = ctypes.c_int(nmo) funpack = pyscf.lib.numpy_helper._np_helper.NPdunpack_tril klshape = (0, nmo, 0, nmo) japcv = numpy.empty((ncas,nmo,ncore[0],nmo-ncore[0])) aapp = numpy.empty((ncas,ncas,nmo,nmo)) aaPP = numpy.empty((ncas,ncas,nmo,nmo)) appa = numpy.empty((ncas,nmo,nmo,ncas)) apPA = numpy.empty((ncas,nmo,nmo,ncas)) apCV = numpy.empty((ncas,nmo,ncore[1],nmo-ncore[1])) ppp = numpy.empty((nmo,nmo,nmo)) for i in range(ncas): buf = _ao2mo.nr_e2_(fload(ncore[0]+i), mo[0], klshape, aosym='s4', mosym='s2', ao_loc=ao_loc) for j in range(nmo): funpack(c_nmo, buf[j].ctypes.data_as(ctypes.c_void_p), ppp[j].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(1)) aapp[i] = ppp[ncore[0]:nocc[0]] appa[i] = ppp[:,:,ncore[0]:nocc[0]] #japcp = avcp * 2 - acpv.transpose(0,2,1,3) - avcp.transpose(0,3,2,1) japcv[i] = ppp[:,:ncore[0],ncore[0]:] * 2 \ - ppp[:ncore[0],:,ncore[0]:].transpose(1,0,2) \ - ppp[ncore[0]:,:ncore[0],:].transpose(2,1,0) buf = _ao2mo.nr_e2_(fload(ncore[0]+i), mo[1], klshape, aosym='s4', mosym='s2', ao_loc=ao_loc) for j in range(nmo): funpack(c_nmo, buf[j].ctypes.data_as(ctypes.c_void_p), ppp[j].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(1)) aaPP[i] = ppp[ncore[0]:nocc[0]] apPA[i] = ppp[:,:,ncore[1]:nocc[1]] apCV[i] = ppp[:,:ncore[1],ncore[1]:] return aapp, aaPP, appa, apPA, japcv, apCV
def _trans_cvcv_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo.shape[1] c_nmo = ctypes.c_int(nmo) funpack = pyscf.lib.numpy_helper._np_helper.NPdunpack_tril jcvcv = numpy.zeros((ncore,nmo-ncore,ncore,nmo-ncore)) vcp = numpy.empty((nmo-ncore,ncore,nmo)) cpp = numpy.empty((ncore,nmo,nmo)) vj = numpy.zeros((nmo,nmo)) vk = numpy.zeros((nmo,nmo)) j_cp = numpy.zeros((ncore,nmo)) k_cp = numpy.zeros((ncore,nmo)) for i in range(ncore): buf = fload(i) klshape = (0, ncore, 0, nmo) _ao2mo.nr_e2_(buf[ncore:nmo], mo, klshape, aosym='s4', mosym='s1', vout=vcp, ao_loc=ao_loc) vk[ncore:] += vcp[:,i] k_cp[i,ncore:] = vcp[:,i,ncore:].diagonal() klshape = (0, nmo, 0, nmo) _ao2mo.nr_e2_(buf[:ncore], mo, klshape, aosym='s4', mosym='s2', vout=buf[:ncore], ao_loc=ao_loc) for j in range(ncore): funpack(c_nmo, buf[j].ctypes.data_as(ctypes.c_void_p), cpp[j].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(1)) vj += cpp[i] j_cp[i] = cpp[i].diagonal() vk[:ncore] += cpp[:,i] k_cp[i,:ncore] = cpp[:,i,:ncore].diagonal() #jcvcv = cvcv * 4 - cvcv.transpose(0,3,2,1) - ccvv.transpose(0,2,1,3) jcvcv[i] = vcp[:,:,ncore:] * 4 \ - vcp[:,:,ncore:].transpose(2,1,0) \ - cpp[:,ncore:,ncore:].transpose(1,0,2) return vj*2-vk, j_cp, k_cp, jcvcv
def _trans_cvcv_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo[0].shape[1] c_nmo = ctypes.c_int(nmo) funpack = pyscf.lib.numpy_helper._np_helper.NPdunpack_tril jc_pp = numpy.empty((ncore[0],nmo,nmo)) jc_PP = numpy.zeros((nmo,nmo)) kc_pp = numpy.empty((ncore[0],nmo,nmo)) jcvcv = numpy.zeros((ncore[0],nmo-ncore[0],ncore[0],nmo-ncore[0])) cvCV = numpy.empty((ncore[0],nmo-ncore[0],ncore[1],nmo-ncore[1])) vcp = numpy.empty((nmo-ncore[0],ncore[0],nmo)) cpp = numpy.empty((ncore[0],nmo,nmo)) for i in range(ncore[0]): buf = fload(i) klshape = (0, ncore[1], ncore[1], nmo-ncore[1]) _ao2mo.nr_e2_(buf[ncore[0]:nmo], mo[1], klshape, aosym='s4', mosym='s1', vout=cvCV[i], ao_loc=ao_loc) klshape = (0, nmo, 0, nmo) tmp = _ao2mo.nr_e2_(buf[i:i+1], mo[1], klshape, aosym='s4', mosym='s1', ao_loc=ao_loc) jc_PP += tmp.reshape(nmo,nmo) klshape = (0, ncore[0], 0, nmo) _ao2mo.nr_e2_(buf[ncore[0]:nmo], mo[0], klshape, aosym='s4', mosym='s1', vout=vcp, ao_loc=ao_loc) kc_pp[i,ncore[0]:] = vcp[:,i] klshape = (0, nmo, 0, nmo) _ao2mo.nr_e2_(buf[:ncore[0]], mo[0], klshape, aosym='s4', mosym='s2', vout=buf[:ncore[0]], ao_loc=ao_loc) for j in range(ncore[0]): funpack(c_nmo, buf[j].ctypes.data_as(ctypes.c_void_p), cpp[j].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(1)) jc_pp[i] = cpp[i] kc_pp[i,:ncore[0]] = cpp[:,i] #jcvcv = cvcv * 2 - cvcv.transpose(2,1,0,3) - ccvv.transpose(0,2,1,3) jcvcv[i] = vcp[:,:,ncore[0]:] * 2 \ - vcp[:,:,ncore[0]:].transpose(2,1,0) \ - cpp[:,ncore[0]:,ncore[0]:].transpose(1,0,2) return jc_pp, jc_PP, kc_pp, jcvcv, cvCV
def get_vind(self, zs): '''Compute Ax''' mol = self.mol mo_coeff = self._scf.mo_coeff mo_energy = self._scf.mo_energy nao, nmo = mo_coeff.shape nocc = (self._scf.mo_occ>0).sum() nvir = nmo - nocc orbv = mo_coeff[:,nocc:] orbo = mo_coeff[:,:nocc] nz = len(zs) dmvo = numpy.empty((nz,nao,nao)) for i, z in enumerate(zs): dmvo[i] = reduce(numpy.dot, (orbv, z.reshape(nvir,nocc), orbo.T)) x_code, c_code = pyscf.dft.vxc.parse_xc_name(self._scf.xc) hyb = self._scf._numint.hybrid_coeff(x_code, spin=(mol.spin>0)+1) mem_now = pyscf.lib.current_memory()[0] max_memory = max(2000, self.max_memory*.9-mem_now) v1ao = _contract_xc_kernel(self, x_code, c_code, dmvo, singlet=self.singlet, max_memory=max_memory) eai = pyscf.lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) eai = eai.ravel() if abs(hyb) > 1e-10: vj, vk = self._scf.get_jk(mol, dmvo, hermi=0) if self.singlet: v1ao += vj*2 - hyb * vk else: v1ao = -hyb * vk else: if self.singlet: dm = dmvo + dmvo.transpose(0,2,1) vj = self._scf.get_j(mol, dm, hermi=1) v1ao += vj v1vo = _ao2mo.nr_e2_(v1ao, mo_coeff, (nocc,nvir,0,nocc)).reshape(-1,nvir*nocc) for i, z in enumerate(zs): v1vo[i] += eai * z return v1vo.reshape(nz,-1)
def get_vind(self, xys): ''' [ A B][X] [-B -A][Y] ''' mo_coeff = self._scf.mo_coeff mo_energy = self._scf.mo_energy nao, nmo = mo_coeff.shape nocc = (self._scf.mo_occ>0).sum() nvir = nmo - nocc orbv = mo_coeff[:,nocc:] orbo = mo_coeff[:,:nocc] nz = len(xys) dms = numpy.empty((nz*2,nao,nao)) for i in range(nz): x, y = xys[i].reshape(2,nvir,nocc) dmx = reduce(numpy.dot, (orbv, x, orbo.T)) dmy = reduce(numpy.dot, (orbv, y, orbo.T)) dms[i ] = dmx + dmy.T # AX + BY dms[i+nz] = dms[i].T # = dmy + dmx.T # AY + BX vj, vk = self._scf.get_jk(self.mol, dms, hermi=0) if self.singlet: vhf = vj*2 - vk else: vhf = -vk #vhf = numpy.asarray([reduce(numpy.dot, (orbv.T, v, orbo)) for v in vhf]) vhf = _ao2mo.nr_e2_(vhf, mo_coeff, (nocc,nvir,0,nocc)).reshape(-1,nvir*nocc) eai = pyscf.lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) eai = eai.ravel() for i, z in enumerate(xys): x, y = z.reshape(2,-1) vhf[i ] += eai * x # AX vhf[i+nz] += eai * y # AY hx = numpy.hstack((vhf[:nz], -vhf[nz:])) return hx.reshape(nz,-1)
def general(mol, mo_coeffs, erifile, auxbasis='weigend+etb', dataname='eri_mo', tmpdir=None, int3c='cint3c2e_sph', aosym='s2ij', int2c='cint2c2e_sph', comp=1, max_memory=2000, ioblk_size=256, verbose=0, compact=True): ''' Transform ij of (ij|L) to MOs. ''' assert(aosym in ('s1', 's2ij')) assert(comp == 1) time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) cholesky_eri_b(mol, swapfile.name, auxbasis, dataname, int3c, aosym, int2c, comp, ioblk_size, verbose=log) fswap = h5py.File(swapfile.name, 'r') time1 = log.timer('AO->MO eri transformation 1 pass', *time0) ijsame = compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nao = mo_coeffs[0].shape[0] auxmol = incore.format_aux_basis(mol, auxbasis) naoaux = auxmol.nao_nr() aosym = _stand_sym_code(aosym) if aosym == 's1': nao_pair = nao * nao aosym_as_nr_e2 = 's1' else: nao_pair = nao * (nao+1) // 2 aosym_as_nr_e2 = 's2kl' if compact and ijsame and aosym != 's1': log.debug('i-mo == j-mo') ijmosym = 's2' nij_pair = nmoi*(nmoi+1) // 2 moij = numpy.asarray(mo_coeffs[0], order='F') ijshape = (0, nmoi, 0, nmoi) else: ijmosym = 's1' nij_pair = nmoi*nmoj moij = numpy.asarray(numpy.hstack((mo_coeffs[0],mo_coeffs[1])), order='F') ijshape = (0, nmoi, nmoi, nmoj) if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') if comp == 1: chunks = (min(int(16e3/nmoj),naoaux), nmoj) # 128K h5d_eri = feri.create_dataset(dataname, (naoaux,nij_pair), 'f8', chunks=chunks) aopairblks = len(fswap[dataname]) else: chunks = (1, min(int(16e3/nmoj),naoaux), nmoj) # 128K h5d_eri = feri.create_dataset(dataname, (comp,naoaux,nij_pair), 'f8', chunks=chunks) aopairblks = len(fswap[dataname+'/0']) if comp > 1: for icomp in range(comp): feri.create_group(str(icomp)) # for h5py old version iolen = min(int(ioblk_size*1e6/8/(nao_pair+nij_pair)), naoaux) totstep = (naoaux+iolen-1)//iolen * comp buf = numpy.empty((iolen, nao_pair)) istep = 0 ti0 = time1 for icomp in range(comp): for row0, row1 in prange(0, naoaux, iolen): nrow = row1 - row0 istep += 1 log.debug('step 2 [%d/%d], [%d,%d:%d], row = %d', istep, totstep, icomp, row0, row1, nrow) col0 = 0 for ic in range(aopairblks): if comp == 1: dat = fswap['%s/%d'%(dataname,ic)] else: dat = fswap['%s/%d/%d'%(dataname,icomp,ic)] col1 = col0 + dat.shape[1] buf[:nrow,col0:col1] = dat[row0:row1] col0 = col1 buf1 = _ao2mo.nr_e2_(buf[:nrow], moij, ijshape, aosym_as_nr_e2, ijmosym) if comp == 1: h5d_eri[row0:row1] = buf1 else: h5d_eri[icomp,row0:row1] = buf1 ti0 = log.timer('step 2 [%d/%d], [%d,%d:%d], row = %d'% (istep, totstep, icomp, row0, row1, nrow), *ti0) fswap.close() feri.close() log.timer('AO->MO CD eri transformation 2 pass', *time1) log.timer('AO->MO CD eri transformation', *time0) return erifile
def general(mol, mo_coeffs, erifile, dataname='eri_mo', tmpdir=None, intor='cint2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=256, verbose=logger.WARN, compact=True): r'''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on the fly. Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. tmpdir : str The directory where to temporarily store the intermediate data (the half-transformed integrals). By default, it's controlled by shell environment variable ``TMPDIR``. The disk space requirement is about comp*mo_coeffs[0].shape[1]*mo_coeffs[1].shape[1]*nao**2 intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: None Examples: >>> from pyscf import gto >>> from pyscf import ao2mo >>> import h5py >>> def view(h5file, dataname='eri_mo'): ... f5 = h5py.File(h5file) ... print('dataset %s, shape %s' % (str(f5.keys()), str(f5[dataname].shape))) ... f5.close() >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo4), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 24) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 21) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5', compact=False) >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo2,mo2), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (55, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', dataname='new') >>> view('oh2.h5', 'new') dataset ['eri_mo', 'new'], shape (55, 55) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='cint2e_ip1_sph', aosym='s1', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 100) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='cint2e_ip1_sph', aosym='s2kl', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 55) ''' time_0pass = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) ijsame = compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) klsame = compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2kl'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao if compact and ijsame and aosym in ('s4', 's2ij'): nij_pair = nmoi*(nmoi+1) // 2 else: nij_pair = nmoi*nmoj if compact and klsame and aosym in ('s4', 's2kl'): log.debug('k-mo == l-mo') klmosym = 's2' nkl_pair = nmok*(nmok+1) // 2 mokl = numpy.asarray(mo_coeffs[2], order='F') klshape = (0, nmok, 0, nmok) else: klmosym = 's1' nkl_pair = nmok*nmol mokl = numpy.asarray(numpy.hstack((mo_coeffs[2],mo_coeffs[3])), order='F') klshape = (0, nmok, nmok, nmol) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') else: assert(isinstance(erifile, h5py.Group)) feri = erifile if comp == 1: chunks = (nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (nij_pair,nkl_pair), 'f8', chunks=chunks) else: chunks = (1,nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (comp,nij_pair,nkl_pair), 'f8', chunks=chunks) if nij_pair == 0 or nkl_pair == 0: if isinstance(erifile, str): feri.close() return erifile log.debug('MO integrals %s are saved in %s/%s', intor, erifile, dataname) log.debug('num. MO ints = %.8g, required disk %.8g MB', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*8/1e6) # transform e1 swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) fswap = h5py.File(swapfile.name, 'w') half_e1(mol, mo_coeffs, fswap, intor, aosym, comp, max_memory, ioblk_size, log, compact) time_1pass = log.timer('AO->MO transformation for %s 1 pass'%intor, *time_0pass) mem_words = max_memory * 1e6 / 8 iobuflen = guess_e2bufsize(ioblk_size, nij_pair, nao_pair)[0] log.debug('step2: kl-pair (ao %d, mo %d), mem %.8g MB, ioblock %.8g MB', nao_pair, nkl_pair, iobuflen*nao_pair*8/1e6, iobuflen*nkl_pair*8/1e6) klaoblks = len(fswap['0']) ijmoblks = int(numpy.ceil(float(nij_pair)/iobuflen)) * comp ao_loc = numpy.asarray(mol.ao_loc_nr(), dtype=numpy.int32) ti0 = time_1pass bufs1 = numpy.empty((iobuflen,nkl_pair)) buf = numpy.empty((iobuflen, nao_pair)) istep = 0 for row0, row1 in prange(0, nij_pair, iobuflen): nrow = row1 - row0 for icomp in range(comp): istep += 1 tioi = 0 log.debug('step 2 [%d/%d], [%d,%d:%d], row = %d', \ istep, ijmoblks, icomp, row0, row1, nrow) buf = _load_from_h5g(fswap['%d'%icomp], row0, row1, buf) ti2 = log.timer('step 2 [%d/%d], load buf'%(istep,ijmoblks), *ti0) tioi += ti2[1]-ti0[1] pbuf = bufs1[:nrow] _ao2mo.nr_e2_(buf[:nrow], mokl, klshape, aosym, klmosym, ao_loc=ao_loc, out=pbuf) tw1 = time.time() if comp == 1: h5d_eri[row0:row1] = pbuf else: h5d_eri[icomp,row0:row1] = pbuf tioi += time.time()-tw1 ti1 = (time.clock(), time.time()) log.debug('step 2 [%d/%d] CPU time: %9.2f, Wall time: %9.2f, I/O time: %9.2f', \ istep, ijmoblks, ti1[0]-ti0[0], ti1[1]-ti0[1], tioi) ti0 = ti1 fswap.close() if isinstance(erifile, str): feri.close() log.timer('AO->MO transformation for %s 2 pass'%intor, *time_1pass) log.timer('AO->MO transformation for %s '%intor, *time_0pass) return erifile
def trans_e1_outcore(mol, mo, ncore, ncas, erifile, max_memory=None, level=1, verbose=logger.WARN): time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) log.debug1("trans_e1_outcore level %d max_memory %d", level, max_memory) nao, nmo = mo.shape nao_pair = nao * (nao + 1) // 2 nocc = ncore + ncas _tmpfile1 = tempfile.NamedTemporaryFile() faapp_buf = h5py.File(_tmpfile1.name) feri = h5py.File(erifile, "w") mo_c = numpy.asarray(mo, order="C") mo = numpy.asarray(mo, order="F") pashape = (0, nmo, ncore, ncas) papa_buf = numpy.zeros((nao, ncas, nmo * ncas)) j_pc = numpy.zeros((nmo, ncore)) k_pc = numpy.zeros((nmo, ncore)) mem_words = int(max(2000, max_memory - papa_buf.nbytes / 1e6) * 1e6 / 8) aobuflen = mem_words // (nao_pair + nocc * nmo) + 1 shranges = outcore.guess_shell_ranges(mol, aobuflen, aobuflen, "s4") ao2mopt = _ao2mo.AO2MOpt(mol, "cint2e_sph", "CVHFnr_schwarz_cond", "CVHFsetnr_direct_scf") ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) log.debug("mem cache %.8g MB", mem_words * 8 / 1e6) ti0 = log.timer("Initializing trans_e1_outcore", *time0) nstep = len(shranges) paapp = 0 maxbuflen = max([x[2] for x in shranges]) bufs1 = numpy.empty((maxbuflen, nao_pair)) bufs2 = numpy.empty((maxbuflen, nmo * ncas)) bufs3 = numpy.empty((maxbuflen, nao * ncore)) # fmmm, ftrans, fdrv for level 1 fmmm = _fpointer("MCSCFhalfmmm_nr_s2_ket") ftrans = _fpointer("AO2MOtranse1_nr_s4") fdrv = getattr(libmcscf, "AO2MOnr_e2_drv") for istep, sh_range in enumerate(shranges): log.debug("[%d/%d], AO [%d:%d], len(buf) = %d", istep + 1, nstep, *(sh_range[:3])) buf = bufs1[: sh_range[2]] _ao2mo.nr_e1fill_("cint2e_sph", sh_range[:3], mol._atm, mol._bas, mol._env, "s4", 1, ao2mopt, buf) if log.verbose >= logger.DEBUG1: ti1 = log.timer("AO integrals buffer", *ti0) bufpa = bufs2[: sh_range[2]] _ao2mo.nr_e1_(buf, mo, pashape, "s4", "s1", vout=bufpa) # jc_pp, kc_pp if level == 1: # ppaa, papa and vhf, jcp, kcp if log.verbose >= logger.DEBUG1: ti1 = log.timer("buffer-pa", *ti1) buf1 = bufs3[: sh_range[2]] fdrv( ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), buf.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(ncore), ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0), ) p0 = 0 for ij in range(sh_range[0], sh_range[1]): i, j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i + 1] j1 = ao_loc[j + 1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di + 1) // 2 buf = numpy.empty((di, di, nao * ncore)) idx = numpy.tril_indices(di) buf[idx] = buf1[p0 : p0 + dij] buf[idx[1], idx[0]] = buf1[p0 : p0 + dij] buf = buf.reshape(di, di, nao, ncore) mo1 = mo_c[i0:i1] tmp = numpy.einsum("uvpc,pc->uvc", buf, mo[:, :ncore]) tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di, -1)) j_pc += numpy.einsum("vp,pvc->pc", mo1, tmp.reshape(nmo, di, ncore)) tmp = numpy.einsum("uvpc,uc->vcp", buf, mo1[:, :ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1, nmo), mo).reshape(di, ncore, nmo) k_pc += numpy.einsum("vp,vcp->pc", mo1, tmp) else: dij = di * dj buf = buf1[p0 : p0 + dij].reshape(di, dj, nao, ncore) mo1 = mo_c[i0:i1] mo2 = mo_c[j0:j1] tmp = numpy.einsum("uvpc,pc->uvc", buf, mo[:, :ncore]) tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di, -1)) j_pc += numpy.einsum("vp,pvc->pc", mo2, tmp.reshape(nmo, dj, ncore)) * 2 tmp = numpy.einsum("uvpc,uc->vcp", buf, mo1[:, :ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1, nmo), mo).reshape(dj, ncore, nmo) k_pc += numpy.einsum("vp,vcp->pc", mo2, tmp) tmp = numpy.einsum("uvpc,vc->ucp", buf, mo2[:, :ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1, nmo), mo).reshape(di, ncore, nmo) k_pc += numpy.einsum("up,ucp->pc", mo1, tmp) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer("j_cp and k_cp", *ti1) if log.verbose >= logger.DEBUG1: ti1 = log.timer("half transformation of the buffer", *ti1) # ppaa, papa faapp_buf[str(istep)] = bufpa.reshape(sh_range[2], nmo, ncas)[:, ncore:nocc].reshape(-1, ncas ** 2).T p0 = 0 for ij in range(sh_range[0], sh_range[1]): i, j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i + 1] j1 = ao_loc[j + 1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di + 1) // 2 buf1 = numpy.empty((di, di, nmo * ncas)) idx = numpy.tril_indices(di) buf1[idx] = bufpa[p0 : p0 + dij] buf1[idx[1], idx[0]] = bufpa[p0 : p0 + dij] else: dij = di * dj buf1 = bufpa[p0 : p0 + dij].reshape(di, dj, -1) mo1 = mo[j0:j1, ncore:nocc].copy() for i in range(di): pyscf.lib.dot(mo1.T, buf1[i], 1, papa_buf[i0 + i], 1) mo1 = mo[i0:i1, ncore:nocc].copy() buf1 = pyscf.lib.dot(mo1.T, buf1.reshape(di, -1)) papa_buf[j0:j1] += buf1.reshape(ncas, dj, -1).transpose(1, 0, 2) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer("ppaa and papa buffer", *ti1) ti0 = log.timer("gen AO/transform MO [%d/%d]" % (istep + 1, nstep), *ti0) buf = buf1 = bufs1 = bufs2 = bufs3 = bufpa = None time1 = log.timer("mc_ao2mo pass 1", *time0) log.debug1("Half transformation done. Current memory %d", pyscf.lib.current_memory()[0]) nblk = int(max(8, min(nmo, max(2000, max_memory * 1e6 / 8 - papa_buf.size) / (ncas ** 2 * nmo)))) log.debug1("nblk for papa = %d", nblk) dset = feri.create_dataset("papa", (nmo, ncas, nmo, ncas), "f8") for i0, i1 in prange(0, nmo, nblk): tmp = pyscf.lib.dot(mo[:, i0:i1].T, papa_buf.reshape(nao, -1)) dset[i0:i1] = tmp.reshape(i1 - i0, ncas, nmo, ncas) papa_buf = tmp = None time1 = log.timer("papa pass 2", *time1) tmp = numpy.empty((ncas ** 2, nao_pair)) p0 = 0 for istep, sh_range in enumerate(shranges): tmp[:, p0 : p0 + sh_range[2]] = faapp_buf[str(istep)] p0 += sh_range[2] nblk = int(max(8, min(nmo, max(2000, max_memory * 1e6 / 8 - tmp.size) / (ncas ** 2 * nmo) - 1))) log.debug1("nblk for ppaa = %d", nblk) dset = feri.create_dataset("ppaa", (nmo, nmo, ncas, ncas), "f8") for i0, i1 in prange(0, nmo, nblk): tmp1 = _ao2mo.nr_e2_(tmp, mo, (i0, i1 - i0, 0, nmo), "s4", "s1", ao_loc=ao_loc) tmp1 = tmp1.reshape(ncas, ncas, i1 - i0, nmo) for j in range(i1 - i0): dset[i0 + j] = tmp1[:, :, j].transpose(2, 0, 1) tmp = tmp1 = None time1 = log.timer("ppaa pass 2", *time1) faapp_buf.close() feri.close() _tmpfile1 = None time0 = log.timer("mc_ao2mo", *time0) return j_pc, k_pc
def general(eri_ao, mo_coeffs, verbose=0, compact=True): r'''For the given four sets of orbitals, transfer the 8-fold or 4-fold 2e AO integrals to MO integrals. Args: eri_ao : ndarray AO integrals, can be either 8-fold or 4-fold symmetry. mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) Kwargs: verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: 2D array of transformed MO integrals. The MO integrals may or may not have the permutation symmetry, depending on the given orbitals, and the kwargs compact. If the four sets of orbitals are identical, the MO integrals will at most have 4-fold symmetry. Examples: >>> from pyscf import gto >>> from pyscf.scf import _vhf >>> from pyscf import ao2mo >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> eri = _vhf.int2e_sph(mol._atm, mol._bas, mol._env) >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo4)) >>> print(eri1.shape) (80, 24) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo3)) >>> print(eri1.shape) (80, 21) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo3,mo3), compact=False) >>> print(eri1.shape) (80, 36) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo1,mo2,mo2)) >>> print(eri1.shape) (55, 36) >>> eri1 = ao2mo.incore.general(eri, (mo1,mo2,mo1,mo2)) >>> print(eri1.shape) (80, 80) ''' if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(sys.stdout, verbose) ijsame = compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) klsame = compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] nao_pair = nao*(nao+1)//2 assert(eri_ao.size in (nao_pair**2, nao_pair*(nao_pair+1)//2)) if compact and ijsame: nij_pair = nmoi*(nmoi+1) // 2 else: nij_pair = nmoi*nmoj if compact and klsame: klmosym = 's2' nkl_pair = nmok*(nmok+1) // 2 mokl = numpy.array(mo_coeffs[2], order='F', copy=False) klshape = (0, nmok, 0, nmok) else: klmosym = 's1' nkl_pair = nmok*nmol mokl = numpy.array(numpy.hstack((mo_coeffs[2],mo_coeffs[3])), \ order='F', copy=False) klshape = (0, nmok, nmok, nmol) if nij_pair == 0 or nkl_pair == 0: # 0 dimension sometimes causes blas problem return numpy.zeros((nij_pair,nkl_pair)) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') # transform e1 eri1 = half_e1(eri_ao, mo_coeffs, compact) # transform e2 eri1 = _ao2mo.nr_e2_(eri1, mokl, klshape, aosym='s4', mosym=klmosym) return eri1
def light_e1_outcore(mol, mo, ncore, ncas, max_memory=None, approx=1, verbose=logger.WARN): time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nao, nmo = mo.shape nao_pair = nao*(nao+1)//2 nocc = ncore + ncas aapp_buf = numpy.empty((nao_pair,ncas,ncas)) appa_buf = numpy.zeros((ncas,nao,nmo*ncas)) max_memory -= (aapp_buf.nbytes+appa_buf.nbytes) / 1e6 mo = numpy.asarray(mo, order='F') nao, nmo = mo.shape pashape = (0, nmo, ncore, ncas) if approx == 1: jc = numpy.empty((nao,nao,ncore)) kc = numpy.zeros((nao,nao,ncore)) else: dm_core = numpy.dot(mo[:,:ncore], mo[:,:ncore].T) * 2 jc = numpy.zeros((nao,nao)) kc = numpy.zeros((nao,nao)) max_memory -= (jc.nbytes+kc.nbytes) / 1e6 mem_words = int(max(1000,max_memory)*1e6/8) aobuflen = mem_words//(nao_pair+nocc*nmo) + 1 shranges = outcore.guess_shell_ranges(mol, aobuflen, aobuflen, 's4') ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) log.debug('mem cache %.8g MB', mem_words*8/1e6) ti0 = log.timer('Initializing light_e1_outcore', *time0) nstep = len(shranges) paapp = 0 maxbuflen = max([x[2] for x in shranges]) bufs1 = numpy.empty((maxbuflen, nao_pair)) bufs2 = numpy.empty((maxbuflen, pashape[1]*pashape[3])) bufs3 = numpy.empty((maxbuflen, nao*ncore)) for istep,sh_range in enumerate(shranges): log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep+1, nstep, *(sh_range[:3])) buf = bufs1[:sh_range[2]] _ao2mo.nr_e1fill_('cint2e_sph', sh_range[:3], mol._atm, mol._bas, mol._env, 's4', 1, ao2mopt, buf) if log.verbose >= logger.DEBUG1: ti1 = log.timer('AO integrals buffer', *ti0) bufpa = bufs2[:sh_range[2]] # jc_pp, kc_pp if approx == 1: # aapp, appa and vhf, jcp, kcp _ao2mo.nr_e1_(buf, mo, pashape, 's4', 's1', vout=bufpa) if log.verbose >= logger.DEBUG1: ti1 = log.timer('buffer-pa', *ti1) buf1 = bufs3[:sh_range[2]] fmmm = _fpointer('MCSCFhalfmmm_nr_s2_ket') ftrans = _fpointer('AO2MOtranse1_nr_s4') fdrv = getattr(libmcscf, 'AO2MOnr_e2_drv') fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), buf.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(ncore), ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0)) p0 = 0 for ij in range(sh_range[0], sh_range[1]): i,j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i+1] j1 = ao_loc[j+1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di+1) // 2 buf = numpy.empty((di,di,nao*ncore)) idx = numpy.tril_indices(di) buf[idx] = buf1[p0:p0+dij] buf[idx[1],idx[0]] = buf1[p0:p0+dij] buf = buf.reshape(di,di,nao,ncore) jc[i0:i1,j0:j1] = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore]) kc[j0:j1] += numpy.einsum('uvpc,uc->vpc', buf, mo[i0:i1,:ncore]) else: dij = di * dj buf = buf1[p0:p0+dij].reshape(di,dj,nao,ncore) jc[i0:i1,j0:j1] = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore]) jc[j0:j1,i0:i1] = jc[i0:i1,j0:j1].transpose(1,0,2) kc[j0:j1] += numpy.einsum('uvpc,uc->vpc', buf, mo[i0:i1,:ncore]) kc[i0:i1] += numpy.einsum('uvpc,vc->upc', buf, mo[j0:j1,:ncore]) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('jc and kc buffer', *ti1) elif approx == 2: # aapp, appa, vhf fdrv = libmcscf.MCSCFnrs4_aapp_jk fdrv(buf.ctypes.data_as(ctypes.c_void_p), bufpa.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), dm_core.ctypes.data_as(ctypes.c_void_p), jc.ctypes.data_as(ctypes.c_void_p), kc.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[0]), ctypes.c_int(sh_range[1]-sh_range[0]), ctypes.c_int(pashape[0]), ctypes.c_int(pashape[1]), ctypes.c_int(pashape[2]), ctypes.c_int(pashape[3]), ao_loc.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(mol.nbas)) if log.verbose >= logger.DEBUG1: ti1 = log.timer('bufpa + core vj and vk', *ti1) else: # aapp, appa _ao2mo.nr_e1_(buf, mo, pashape, 's4', 's1', vout=bufpa) # aapp, appa aapp_buf[paapp:paapp+sh_range[2]] = \ bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc] paapp += sh_range[2] p0 = 0 for ij in range(sh_range[0], sh_range[1]): i,j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i+1] j1 = ao_loc[j+1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di+1) // 2 buf1 = numpy.empty((di,di,nmo*ncas)) idx = numpy.tril_indices(di) buf1[idx] = bufpa[p0:p0+dij] buf1[idx[1],idx[0]] = bufpa[p0:p0+dij] buf1 = buf1.reshape(di,-1) else: dij = di * dj buf1 = bufpa[p0:p0+dij].reshape(di,dj,-1) mo1 = mo[j0:j1,ncore:nocc].copy() for i in range(di): appa_buf[:,i0+i] += pyscf.lib.dot(mo1.T, buf1[i]) buf1 = bufpa[p0:p0+dij].reshape(di,-1) mo1 = mo[i0:i1,ncore:nocc].copy() appa_buf[:,j0:j1] += pyscf.lib.dot(mo1.T, buf1).reshape(ncas,dj,-1) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('aapp and appa buffer', *ti1) ti0 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) bufs1 = bufs2 = bufs3 = None aapp_buf = pyscf.lib.transpose(aapp_buf.reshape(nao_pair,-1)) aapp = _ao2mo.nr_e2_(aapp_buf, mo, (0,nmo,0,nmo), 's4', 's1', ao_loc=ao_loc) aapp = aapp.reshape(ncas,ncas,nmo,nmo) aapp_buf = None if nao == nmo: appa = appa_buf else: appa = numpy.empty((ncas,nao,nmo*ncas)) for i in range(ncas): appa[i] = numpy.dot(mo.T, appa_buf[i].reshape(nao,-1)) appa = appa.reshape(ncas,nmo,nmo,ncas) appa_buf = None if approx == 1: vhf_c = numpy.einsum('ijc->ij', jc)*2 - numpy.einsum('ijc->ij', kc) vhf_c = reduce(numpy.dot, (mo.T, vhf_c, mo)) j_cp = numpy.dot(mo.T, jc.reshape(nao,-1)).reshape(nao,nao,ncore) j_cp = numpy.einsum('pj,jpi->ij', mo, j_cp) k_cp = numpy.dot(mo.T, kc.reshape(nao,-1)).reshape(nao,nao,ncore) k_cp = numpy.einsum('pj,jpi->ij', mo, k_cp) elif approx == 2: jc = pyscf.lib.hermi_triu(jc, hermi=1, inplace=True) kc = pyscf.lib.hermi_triu(kc, hermi=1, inplace=True) vhf_c = reduce(numpy.dot, (mo.T, jc-kc*.5, mo)) j_cp = k_cp = None elif approx == 3: vhf_c = j_cp = k_cp = None time0 = log.timer('mc_ao2mo', *time0) return vhf_c, j_cp, k_cp, aapp, appa