def ao2mo(self, mo_coeff): # the exact integral transformation eris = casscf_class.ao2mo(self, mo_coeff) log = logger.Logger(self.stdout, self.verbose) # Add the approximate diagonal term for orbital hessian t1 = t0 = (time.clock(), time.time()) mo = numpy.asarray(mo_coeff, order='F') nao, nmo = mo.shape ncore = self.ncore eris.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') bufs1 = numpy.empty((self.with_df.blockdim,nmo,nmo)) for eri1 in self.with_df.loop(): naux = eri1.shape[0] buf = bufs1[:naux] fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) bufd = numpy.einsum('kii->ki', buf) eris.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) k_cp += numpy.einsum('kij,kij->ij', buf[:,:ncore], buf[:,:ncore]) t1 = log.timer_debug1('j_pc and k_pc', *t1) eris.k_pc = k_cp.T.copy() log.timer('ao2mo density fit part', *t0) return eris
def get_h2eff(self, mo_coeff=None): # For CASCI if self.with_df: mo = numpy.asarray(mo_coeff, order='F') nao, nmo = mo.shape naoaux = self.with_df.get_naoaux() buf = numpy.empty((naoaux,nmo*(nmo+1)//2)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_s2') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') b0 = 0 for eri1 in self.with_df.loop(): naux = eri1.shape[0] fdrv(ftrans, fmmm, buf[b0:b0+naux].ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) b0 += naux eri = pyscf.lib.dot(buf.T, buf) return eri else: return casscf_class.get_h2eff(self, mo_coeff)
def ao2mo_(casscf, mo): t0 = (time.clock(), time.time()) log = logger.Logger(casscf.stdout, casscf.verbose) # using dm=[], a hacky call to dfhf.get_jk, to generate casscf._cderi dfhf.get_jk_(casscf, casscf.mol, []) if log.verbose >= logger.DEBUG1: t1 = log.timer('Generate density fitting integrals', *t0) if hasattr(casscf._scf, '_tag_df') and casscf._scf._tag_df: eris = _ERIS(casscf, mo) else: # Only approximate the orbital rotation, call the 4-center integral # transformation. CASSCF is exact. eris = mc_ao2mo._ERIS(casscf, mo, 'incore', level=2) t0 = (time.clock(), time.time()) mo = numpy.asarray(mo, order='F') nao, nmo = mo.shape ncore = casscf.ncore eris.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') bufs1 = numpy.empty((dfhf.BLOCKDIM,nmo,nmo)) with df.load(casscf._cderi) as feri: for b0, b1 in dfhf.prange(0, casscf._naoaux, dfhf.BLOCKDIM): eri1 = numpy.asarray(feri[b0:b1], order='C') buf = bufs1[:b1-b0] if log.verbose >= logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) if log.verbose >= logger.DEBUG1: t1 = log.timer('transform [%d:%d]'%(b0,b1), *t1) bufd = numpy.einsum('kii->ki', buf) eris.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) k_cp += numpy.einsum('kij,kij->ij', buf[:,:ncore], buf[:,:ncore]) if log.verbose >= logger.DEBUG1: t1 = log.timer('j_pc and k_pc', *t1) eri1 = None eris.k_pc = k_cp.T.copy() log.timer('ao2mo density fit part', *t0) return eris
def ao2mo(self, mo): t0 = (time.clock(), time.time()) ncore = self.ncore log = pyscf.lib.logger.Logger(self.stdout, self.verbose) # using dm=[], a hacky call to dfhf.get_jk, to generate self._cderi self.get_jk(self.mol, []) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('Generate density fitting integrals', *t0) eris = mc_ao2mo._ERIS(self, mo, 'incore', level=2) t0 = (time.clock(), time.time()) mo = numpy.asarray(mo, order='F') nao, nmo = mo.shape eris.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2kl') bufs1 = numpy.empty((dfhf.BLOCKDIM,nmo,nmo)) with df.load(self._cderi) as feri: for b0, b1 in dfhf.prange(0, self._naoaux, dfhf.BLOCKDIM): eri1 = numpy.array(feri[b0:b1], copy=False) buf = bufs1[:b1-b0] if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('transform [%d:%d]'%(b0,b1), *t1) bufd = numpy.einsum('kii->ki', buf).copy() #:eris.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) pyscf.lib.dot(bufd.T, numpy.asarray(bufd[:,:ncore],order='C'), 1, eris.j_pc, 1) k_cp += numpy.einsum('kij,kij->ij', buf[:,:ncore], buf[:,:ncore]) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('j_pc and k_pc', *t1) eri1 = None eris.k_pc = k_cp.T.copy() log.timer('ao2mo density fit part', *t0) return eris
def ao2mo(self, mo_coeff): log = logger.Logger(self.stdout, self.verbose) # the exact integral transformation eris = casscf_class.ao2mo(self, mo_coeff) # using dm=[], a hacky call to dfhf.get_jk, to generate self._cderi dfhf.get_jk_(self, self.mol, []) # Add the approximate diagonal term for orbital hessian t0 = (time.clock(), time.time()) mo = numpy.asarray(mo_coeff, order='F') nao, nmo = mo.shape ncore = self.ncore eris.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') bufs1 = numpy.empty((dfhf.BLOCKDIM,nmo,nmo)) with df.load(self._cderi) as feri: for b0, b1 in dfhf.prange(0, self._naoaux, dfhf.BLOCKDIM): eri1 = numpy.asarray(feri[b0:b1], order='C') buf = bufs1[:b1-b0] if log.verbose >= logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) if log.verbose >= logger.DEBUG1: t1 = log.timer('transform [%d:%d]'%(b0,b1), *t1) bufd = numpy.einsum('kii->ki', buf) eris.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) k_cp += numpy.einsum('kij,kij->ij', buf[:,:ncore], buf[:,:ncore]) if log.verbose >= logger.DEBUG1: t1 = log.timer('j_pc and k_pc', *t1) eri1 = None eris.k_pc = k_cp.T.copy() log.timer('ao2mo density fit part', *t0) return eris
def ao2mo(self, mo): ncore = self.ncore #self._cderi = None # FIXME? leave as much memory as possible for mc_ao2mo eris = mc_ao2mo._ERIS(self, mo, 'incore', 2) # using dm=[], a hacky call to dfhf.get_jk, to generate self._cderi t0 = (time.clock(), time.time()) log = pyscf.lib.logger.Logger(self.stdout, self.verbose) self.get_jk(self.mol, []) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('Generate density fitting integrals', *t0) mo = numpy.asarray(mo, order='F') nao, nmo = mo.shape eris.j_cp = numpy.zeros((ncore,nmo)) eris.k_cp = numpy.zeros((ncore,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2kl') with df.load(self._cderi) as feri: for b0, b1 in dfhf.prange(0, self._naoaux, dfhf.BLOCKDIM): eri1 = numpy.array(feri[b0:b1], copy=False) buf = numpy.empty((b1-b0,nmo,nmo)) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('transform [%d:%d]'%(b0,b1), *t1) bufd = numpy.einsum('kii->ki', buf).copy() #:eris.j_cp += numpy.einsum('ki,kj->ij', bufd[:,:ncore], bufd) pyscf.lib.dot(bufd[:,:ncore].T.copy(), bufd, 1, eris.j_cp, 1) eris.k_cp += numpy.einsum('kij,kij->ij', buf[:,:ncore], buf[:,:ncore]) if log.verbose >= pyscf.lib.logger.DEBUG1: t1 = log.timer('j_cp and k_cp', *t1) return eris
def ao2mo_aaaa(casscf, mo): dfhf.get_jk_(casscf, casscf.mol, []) nao, nmo = mo.shape buf = numpy.empty((casscf._naoaux,nmo*(nmo+1)//2)) mo = numpy.asarray(mo, order='F') fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_s2') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') with df.load(casscf._cderi) as feri: for b0, b1 in dfhf.prange(0, casscf._naoaux, dfhf.BLOCKDIM): eri1 = numpy.asarray(feri[b0:b1], order='C') fdrv(ftrans, fmmm, buf[b0:b1].ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) eri1 = None eri = pyscf.lib.dot(buf.T, buf) return eri
def half_e1(eri_ao, mo_coeffs, compact=True): r'''Given two set of orbitals, half transform the (ij| pair of 8-fold or 4-fold AO integrals (ij|kl) Args: eri_ao : ndarray AO integrals, can be either 8-fold or 4-fold symmetry. mo_coeffs : list of ndarray Two sets of orbital coefficients, corresponding to the i, j indices of (ij|kl) Kwargs: compact : bool When compact is True, the returned MO integrals uses the highest possible permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: ndarray of transformed MO integrals. The MO integrals may or may not have the permutation symmetry, depending on the given orbitals, and the kwargs compact. Examples: >>> from pyscf import gto >>> from pyscf import ao2mo >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> eri = _vhf.int2e_sph(mol._atm, mol._bas, mol._env) >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> eri1 = ao2mo.incore.half_e1(eri, (mo1,mo2)) >>> eri1 = ao2mo.incore.half_e1(eri, (mo1,mo2)) >>> print(eri1.shape) (80, 28) >>> eri1 = ao2mo.incore.half_e1(eri, (mo1,mo2), compact=False) >>> print(eri1.shape) (80, 28) >>> eri1 = ao2mo.incore.half_e1(eri, (mo1,mo1)) >>> print(eri1.shape) (55, 28) ''' eri_ao = numpy.asarray(eri_ao, order='C') nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nao = mo_coeffs[0].shape[0] nao_pair = nao*(nao+1)//2 ijmosym, nij_pair, moij, ijshape = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) ijshape = (ijshape[0], ijshape[1]-ijshape[0], ijshape[2], ijshape[3]-ijshape[2]) eri1 = numpy.empty((nij_pair,nao_pair)) if nij_pair == 0: return eri1 if eri_ao.size == nao_pair**2: # 4-fold symmetry ftrans = _ao2mo._fpointer('AO2MOtranse1_incore_s4') else: ftrans = _ao2mo._fpointer('AO2MOtranse1_incore_s8') if ijmosym == 's2': fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_s2') elif nmoi <= nmoj: fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') else: fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_igtj') fdrv = getattr(_ao2mo.libao2mo, 'AO2MOnr_e1incore_drv') bufs = numpy.empty((BLOCK, nij_pair)) for blk0 in range(0, nao_pair, BLOCK): blk1 = min(blk0+BLOCK, nao_pair) buf = bufs[:blk1-blk0] fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri_ao.ctypes.data_as(ctypes.c_void_p), moij.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(blk0), ctypes.c_int(blk1-blk0), ctypes.c_int(nao), ctypes.c_int(ijshape[0]), ctypes.c_int(ijshape[1]), ctypes.c_int(ijshape[2]), ctypes.c_int(ijshape[3])) eri1[:,blk0:blk1] = buf.T return eri1
def get_jk_(mf, mol, dms, hermi=1, with_j=True, with_k=True): t0 = (time.clock(), time.time()) log = logger.Logger(mf.stdout, mf.verbose) if not hasattr(mf, '_cderi') or mf._cderi is None: nao = mol.nao_nr() nao_pair = nao*(nao+1)//2 auxmol = df.incore.format_aux_basis(mol, mf.auxbasis) mf._naoaux = auxmol.nao_nr() if (nao_pair*mf._naoaux*8/1e6*2+pyscf.lib.current_memory()[0] < mf.max_memory*.8): mf._cderi = df.incore.cholesky_eri(mol, auxbasis=mf.auxbasis, verbose=log) else: mf._cderi = tempfile.NamedTemporaryFile() df.outcore.cholesky_eri(mol, mf._cderi.name, auxbasis=mf.auxbasis, verbose=log) # if (nao_pair*mf._naoaux*8/1e6+pyscf.lib.current_memory()[0] # < mf.max_memory*.9): # with df.load(mf._cderi) as feri: # cderi = numpy.asarray(feri) # mf._cderi = cderi if mf._naoaux is None: # By overwriting mf._cderi, one can provide the Cholesky integrals for "DF/RI" calculation with df.load(mf._cderi) as feri: mf._naoaux = feri.shape[0] if len(dms) == 0: return [], [] cderi = mf._cderi nao = mol.nao_nr() fmmm = df.incore._fpointer('RIhalfmmm_nr_s2_bra') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') if isinstance(dms, numpy.ndarray) and dms.ndim == 2: dms = [dms] nset = 1 else: nset = len(dms) vj = numpy.zeros((nset,nao,nao)) vk = numpy.zeros((nset,nao,nao)) #:vj = reduce(numpy.dot, (cderi.reshape(-1,nao*nao), dm.reshape(-1), #: cderi.reshape(-1,nao*nao))).reshape(nao,nao) if hermi == 1: # I cannot assume dm is positive definite because it might be the density # matrix difference when the mf.direct_scf flag is set. dmtril = [] cpos = [] cneg = [] for k, dm in enumerate(dms): if with_j: dmtril.append(pyscf.lib.pack_tril(dm+dm.T)) for i in range(nao): dmtril[k][i*(i+1)//2+i] *= .5 if with_k: e, c = scipy.linalg.eigh(dm) pos = e > OCCDROP neg = e < -OCCDROP #:vk = numpy.einsum('pij,jk->kpi', cderi, c[:,abs(e)>OCCDROP]) #:vk = numpy.einsum('kpi,kpj->ij', vk, vk) tmp = numpy.einsum('ij,j->ij', c[:,pos], numpy.sqrt(e[pos])) cpos.append(numpy.asarray(tmp, order='F')) tmp = numpy.einsum('ij,j->ij', c[:,neg], numpy.sqrt(-e[neg])) cneg.append(numpy.asarray(tmp, order='F')) if mf.verbose >= logger.DEBUG1: t1 = log.timer('Initialization', *t0) with df.load(cderi) as feri: buf = numpy.empty((BLOCKDIM*nao,nao)) for b0, b1 in prange(0, mf._naoaux, BLOCKDIM): eri1 = numpy.array(feri[b0:b1], copy=False) if mf.verbose >= logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) for k in range(nset): if with_j: buf1 = reduce(numpy.dot, (eri1, dmtril[k], eri1)) vj[k] += pyscf.lib.unpack_tril(buf1, hermi) if with_k and cpos[k].shape[1] > 0: buf1 = buf[:(b1-b0)*cpos[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cpos[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cpos[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0)) vk[k] += pyscf.lib.dot(buf1.T, buf1) if with_k and cneg[k].shape[1] > 0: buf1 = buf[:(b1-b0)*cneg[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cneg[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cneg[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0)) vk[k] -= pyscf.lib.dot(buf1.T, buf1) if mf.verbose >= logger.DEBUG1: t1 = log.timer('jk', *t1) else: #:vk = numpy.einsum('pij,jk->pki', cderi, dm) #:vk = numpy.einsum('pki,pkj->ij', cderi, vk) fcopy = df.incore._fpointer('RImmm_nr_s2_copy') rargs = (ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(0)) dms = [numpy.asarray(dm, order='F') for dm in dms] if mf.verbose >= logger.DEBUG1: t1 = log.timer('Initialization', *t0) with df.load(cderi) as feri: buf = numpy.empty((2,BLOCKDIM,nao,nao)) for b0, b1 in prange(0, mf._naoaux, BLOCKDIM): eri1 = numpy.array(feri[b0:b1], copy=False) if mf.verbose >= logger.DEBUG1: t1 = log.timer('load buf %d:%d'%(b0,b1), *t1) for k in range(nset): buf1 = buf[0,:b1-b0] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), *rargs) rho = numpy.einsum('kii->k', buf1) vj[k] += pyscf.lib.unpack_tril(numpy.dot(rho, eri1), 1) if with_k: buf2 = buf[1,:b1-b0] fdrv(ftrans, fcopy, buf2.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), *rargs) vk[k] += pyscf.lib.dot(buf1.reshape(-1,nao).T, buf2.reshape(-1,nao)) if mf.verbose >= logger.DEBUG1: t1 = log.timer('jk', *t1) if len(dms) == 1: vj = vj[0] vk = vk[0] logger.timer(mf, 'vj and vk', *t0) return vj, vk
def __init__(self, casscf, mo): assert(casscf._scf._tag_df) import gc gc.collect() log = logger.Logger(casscf.stdout, casscf.verbose) mol = casscf.mol nao, nmo = mo.shape ncore = casscf.ncore ncas = casscf.ncas nocc = ncore + ncas naoaux = casscf._naoaux mem_incore, mem_outcore, mem_basic = _mem_usage(ncore, ncas, nmo) mem_now = pyscf.lib.current_memory()[0] max_memory = max(3000, casscf.max_memory*.9-mem_now) if max_memory < mem_basic: log.warn('Calculation needs %d MB memory, over CASSCF.max_memory (%d MB) limit', (mem_basic+mem_now)/.9, casscf.max_memory) t0 = (time.clock(), time.time()) self._tmpfile = tempfile.NamedTemporaryFile() self.feri = h5py.File(self._tmpfile.name, 'w') self.ppaa = self.feri.create_dataset('ppaa', (nmo,nmo,ncas,ncas), 'f8') self.papa = self.feri.create_dataset('papa', (nmo,ncas,nmo,ncas), 'f8') self.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) mo = numpy.asarray(mo, order='F') _tmpfile1 = tempfile.NamedTemporaryFile() fxpp = h5py.File(_tmpfile1.name) bufpa = numpy.empty((naoaux,nmo,ncas)) bufs1 = numpy.empty((dfhf.BLOCKDIM,nmo,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') t2 = t1 = t0 fxpp_keys = [] with df.load(casscf._cderi) as feri: for b0, b1 in dfhf.prange(0, naoaux, dfhf.BLOCKDIM): eri1 = numpy.asarray(feri[b0:b1], order='C') if log.verbose >= logger.DEBUG1: t2 = log.timer('load buf %d:%d'%(b0,b1), *t2) bufpp = bufs1[:b1-b0] fdrv(ftrans, fmmm, bufpp.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(b1-b0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) fxpp_keys.append([str(b0), b0, b1]) fxpp[str(b0)] = bufpp.transpose(1,2,0) bufpa[b0:b1] = bufpp[:,:,ncore:nocc] bufd = numpy.einsum('kii->ki', bufpp) self.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) k_cp += numpy.einsum('kij,kij->ij', bufpp[:,:ncore], bufpp[:,:ncore]) if log.verbose >= logger.DEBUG1: t1 = log.timer('j_pc and k_pc', *t1) eri1 = None self.k_pc = k_cp.T.copy() bufs1 = bufpp = None t1 = log.timer('density fitting ao2mo pass1', *t0) mem_now = pyscf.lib.current_memory()[0] nblk = int(max(8, min(nmo, ((max_memory-mem_now)*1e6/8-bufpa.size)/(ncas**2*nmo)))) bufs1 = numpy.empty((nblk,ncas,nmo,ncas)) dgemm = pyscf.lib.numpy_helper._dgemm for p0, p1 in prange(0, nmo, nblk): #tmp = numpy.dot(bufpa[:,p0:p1].reshape(naoaux,-1).T, # bufpa.reshape(naoaux,-1)) tmp = bufs1[:p1-p0] dgemm('T', 'N', (p1-p0)*ncas, nmo*ncas, naoaux, bufpa.reshape(naoaux,-1), bufpa.reshape(naoaux,-1), tmp.reshape(-1,nmo*ncas), 1, 0, p0*ncas, 0, 0) self.papa[p0:p1] = tmp.reshape(p1-p0,ncas,nmo,ncas) bufaa = bufpa[:,ncore:nocc,:].copy().reshape(-1,ncas**2) bufs1 = bufpa = None t1 = log.timer('density fitting papa pass2', *t1) mem_now = pyscf.lib.current_memory()[0] nblk = int(max(8, min(nmo, (max_memory-mem_now)*1e6/8/(nmo*naoaux+ncas**2*nmo)))) bufs1 = numpy.empty((nblk,nmo,naoaux)) bufs2 = numpy.empty((nblk,nmo,ncas,ncas)) for p0, p1 in prange(0, nmo, nblk): nrow = p1 - p0 buf = bufs1[:nrow] tmp = bufs2[:nrow].reshape(-1,ncas**2) col0 = 0 for key, col0, col1 in fxpp_keys: buf[:nrow,:,col0:col1] = fxpp[key][p0:p1] pyscf.lib.dot(buf.reshape(-1,naoaux), bufaa, 1, tmp) self.ppaa[p0:p1] = tmp.reshape(p1-p0,nmo,ncas,ncas) bufs1 = bufs2 = buf = None t1 = log.timer('density fitting ppaa pass2', *t1) fxpp.close() self.feri.flush() dm_core = numpy.dot(mo[:,:ncore], mo[:,:ncore].T) vj, vk = casscf.get_jk(mol, dm_core) self.vhf_c = reduce(numpy.dot, (mo.T, vj*2-vk, mo)) t0 = log.timer('density fitting ao2mo', *t0)
def get_jk(dfobj, mol, dms, hermi=1, vhfopt=None, with_j=True, with_k=True): t0 = t1 = (time.clock(), time.time()) log = logger.Logger(dfobj.stdout, dfobj.verbose) if len(dms) == 0: return [], [] elif isinstance(dms, numpy.ndarray) and dms.ndim == 2: nset = 1 dms = [dms] else: nset = len(dms) nao = dms[0].shape[0] fmmm = _ri._fpointer('RIhalfmmm_nr_s2_bra') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') vj = numpy.zeros((nset, nao, nao)) vk = numpy.zeros((nset, nao, nao)) null = pyscf.lib.c_null_ptr() #:vj = reduce(numpy.dot, (cderi.reshape(-1,nao*nao), dm.reshape(-1), #: cderi.reshape(-1,nao*nao))).reshape(nao,nao) if hermi == 1: # and numpy.einsum('ij,ij->', dm, ovlp) > 0.1 # I cannot assume dm is positive definite because it might be the density # matrix difference when the mf.direct_scf flag is set. dmtril = [] cpos = [] cneg = [] for k, dm in enumerate(dms): if with_j: dmtril.append(pyscf.lib.pack_tril(dm + dm.T)) i = numpy.arange(nao) dmtril[k][i * (i + 1) // 2 + i] *= .5 if with_k: e, c = scipy.linalg.eigh(dm) pos = e > OCCDROP neg = e < -OCCDROP #:vk = numpy.einsum('pij,jk->kpi', cderi, c[:,abs(e)>OCCDROP]) #:vk = numpy.einsum('kpi,kpj->ij', vk, vk) tmp = numpy.einsum('ij,j->ij', c[:, pos], numpy.sqrt(e[pos])) cpos.append(numpy.asarray(tmp, order='F')) tmp = numpy.einsum('ij,j->ij', c[:, neg], numpy.sqrt(-e[neg])) cneg.append(numpy.asarray(tmp, order='F')) buf = numpy.empty((dfobj.blockdim * nao, nao)) for eri1 in dfobj.loop(): naux, nao_pair = eri1.shape assert (nao_pair == nao * (nao + 1) // 2) for k in range(nset): if with_j: buf1 = reduce(numpy.dot, (eri1, dmtril[k], eri1)) vj[k] += pyscf.lib.unpack_tril(buf1, hermi) if with_k and cpos[k].shape[1] > 0: buf1 = buf[:naux * cpos[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cpos[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cpos[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) vk[k] += pyscf.lib.dot(buf1.T, buf1) if with_k and cneg[k].shape[1] > 0: buf1 = buf[:naux * cneg[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cneg[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cneg[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) vk[k] -= pyscf.lib.dot(buf1.T, buf1) t1 = log.timer_debug1('jk', *t1) else: #:vk = numpy.einsum('pij,jk->pki', cderi, dm) #:vk = numpy.einsum('pki,pkj->ij', cderi, vk) fcopy = _ri._fpointer('RImmm_nr_s2_copy') rargs = (ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) dms = [numpy.asarray(dm, order='F') for dm in dms] buf = numpy.empty((2, dfobj.blockdim, nao, nao)) for eri1 in dfobj.loop(): naux, nao_pair = eri1.shape for k in range(nset): buf1 = buf[0, :naux] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), *rargs) rho = numpy.einsum('kii->k', buf1) vj[k] += pyscf.lib.unpack_tril(numpy.dot(rho, eri1), 1) if with_k: buf2 = buf[1, :naux] fdrv(ftrans, fcopy, buf2.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), *rargs) vk[k] += pyscf.lib.dot( buf1.reshape(-1, nao).T, buf2.reshape(-1, nao)) t1 = log.timer_debug1('jk', *t1) if len(dms) == 1: vj = vj[0] vk = vk[0] logger.timer(dfobj, 'vj and vk', *t0) return vj, vk
def get_jk(dfobj, mol, dms, hermi=1, vhfopt=None, with_j=True, with_k=True): t0 = t1 = (time.clock(), time.time()) log = logger.Logger(dfobj.stdout, dfobj.verbose) if len(dms) == 0: return [], [] elif isinstance(dms, numpy.ndarray) and dms.ndim == 2: nset = 1 dms = [dms] else: nset = len(dms) nao = dms[0].shape[0] fmmm = _ri._fpointer('RIhalfmmm_nr_s2_bra') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') vj = numpy.zeros((nset,nao,nao)) vk = numpy.zeros((nset,nao,nao)) null = pyscf.lib.c_null_ptr() #:vj = reduce(numpy.dot, (cderi.reshape(-1,nao*nao), dm.reshape(-1), #: cderi.reshape(-1,nao*nao))).reshape(nao,nao) if hermi == 1: # and numpy.einsum('ij,ij->', dm, ovlp) > 0.1 # I cannot assume dm is positive definite because it might be the density # matrix difference when the mf.direct_scf flag is set. dmtril = [] cpos = [] cneg = [] for k, dm in enumerate(dms): if with_j: dmtril.append(pyscf.lib.pack_tril(dm+dm.T)) i = numpy.arange(nao) dmtril[k][i*(i+1)//2+i] *= .5 if with_k: e, c = scipy.linalg.eigh(dm) pos = e > OCCDROP neg = e < -OCCDROP #:vk = numpy.einsum('pij,jk->kpi', cderi, c[:,abs(e)>OCCDROP]) #:vk = numpy.einsum('kpi,kpj->ij', vk, vk) tmp = numpy.einsum('ij,j->ij', c[:,pos], numpy.sqrt(e[pos])) cpos.append(numpy.asarray(tmp, order='F')) tmp = numpy.einsum('ij,j->ij', c[:,neg], numpy.sqrt(-e[neg])) cneg.append(numpy.asarray(tmp, order='F')) buf = numpy.empty((dfobj.blockdim*nao,nao)) for eri1 in dfobj.loop(): naux, nao_pair = eri1.shape assert(nao_pair == nao*(nao+1)//2) for k in range(nset): if with_j: buf1 = reduce(numpy.dot, (eri1, dmtril[k], eri1)) vj[k] += pyscf.lib.unpack_tril(buf1, hermi) if with_k and cpos[k].shape[1] > 0: buf1 = buf[:naux*cpos[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cpos[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cpos[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) vk[k] += pyscf.lib.dot(buf1.T, buf1) if with_k and cneg[k].shape[1] > 0: buf1 = buf[:naux*cneg[k].shape[1]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), cneg[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(cneg[k].shape[1]), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) vk[k] -= pyscf.lib.dot(buf1.T, buf1) t1 = log.timer_debug1('jk', *t1) else: #:vk = numpy.einsum('pij,jk->pki', cderi, dm) #:vk = numpy.einsum('pki,pkj->ij', cderi, vk) fcopy = _ri._fpointer('RImmm_nr_s2_copy') rargs = (ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(0), null, ctypes.c_int(0)) dms = [numpy.asarray(dm, order='F') for dm in dms] buf = numpy.empty((2,dfobj.blockdim,nao,nao)) for eri1 in dfobj.loop(): naux, nao_pair = eri1.shape for k in range(nset): buf1 = buf[0,:naux] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), *rargs) rho = numpy.einsum('kii->k', buf1) vj[k] += pyscf.lib.unpack_tril(numpy.dot(rho, eri1), 1) if with_k: buf2 = buf[1,:naux] fdrv(ftrans, fcopy, buf2.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), *rargs) vk[k] += pyscf.lib.dot(buf1.reshape(-1,nao).T, buf2.reshape(-1,nao)) t1 = log.timer_debug1('jk', *t1) if len(dms) == 1: vj = vj[0] vk = vk[0] logger.timer(dfobj, 'vj and vk', *t0) return vj, vk
def __init__(self, casscf, mo, with_df): import gc gc.collect() log = logger.Logger(casscf.stdout, casscf.verbose) mol = casscf.mol nao, nmo = mo.shape ncore = casscf.ncore ncas = casscf.ncas nocc = ncore + ncas naoaux = with_df.get_naoaux() mem_incore, mem_outcore, mem_basic = _mem_usage(ncore, ncas, nmo) mem_now = pyscf.lib.current_memory()[0] max_memory = max(3000, casscf.max_memory*.9-mem_now) if max_memory < mem_basic: log.warn('Calculation needs %d MB memory, over CASSCF.max_memory (%d MB) limit', (mem_basic+mem_now)/.9, casscf.max_memory) t1 = t0 = (time.clock(), time.time()) self._tmpfile = tempfile.NamedTemporaryFile() self.feri = h5py.File(self._tmpfile.name, 'w') self.ppaa = self.feri.create_dataset('ppaa', (nmo,nmo,ncas,ncas), 'f8') self.papa = self.feri.create_dataset('papa', (nmo,ncas,nmo,ncas), 'f8') self.j_pc = numpy.zeros((nmo,ncore)) k_cp = numpy.zeros((ncore,nmo)) mo = numpy.asarray(mo, order='F') _tmpfile1 = tempfile.NamedTemporaryFile() fxpp = h5py.File(_tmpfile1.name) bufpa = numpy.empty((naoaux,nmo,ncas)) bufs1 = numpy.empty((with_df.blockdim,nmo,nmo)) fmmm = _ao2mo._fpointer('AO2MOmmm_nr_s2_iltj') fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo._fpointer('AO2MOtranse2_nr_s2') fxpp_keys = [] b0 = 0 for k, eri1 in enumerate(with_df.loop()): naux = eri1.shape[0] bufpp = bufs1[:naux] fdrv(ftrans, fmmm, bufpp.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_int(0), ctypes.c_int(nmo), ctypes.c_void_p(0), ctypes.c_int(0)) fxpp_keys.append([str(k), b0, b0+naux]) fxpp[str(k)] = bufpp.transpose(1,2,0) bufpa[b0:b0+naux] = bufpp[:,:,ncore:nocc] bufd = numpy.einsum('kii->ki', bufpp) self.j_pc += numpy.einsum('ki,kj->ij', bufd, bufd[:,:ncore]) k_cp += numpy.einsum('kij,kij->ij', bufpp[:,:ncore], bufpp[:,:ncore]) b0 += naux t1 = log.timer_debug1('j_pc and k_pc', *t1) self.k_pc = k_cp.T.copy() bufs1 = bufpp = None t1 = log.timer('density fitting ao2mo pass1', *t0) mem_now = pyscf.lib.current_memory()[0] nblk = int(max(8, min(nmo, ((max_memory-mem_now)*1e6/8-bufpa.size)/(ncas**2*nmo)))) bufs1 = numpy.empty((nblk,ncas,nmo,ncas)) dgemm = pyscf.lib.numpy_helper._dgemm for p0, p1 in prange(0, nmo, nblk): #tmp = numpy.dot(bufpa[:,p0:p1].reshape(naoaux,-1).T, # bufpa.reshape(naoaux,-1)) tmp = bufs1[:p1-p0] dgemm('T', 'N', (p1-p0)*ncas, nmo*ncas, naoaux, bufpa.reshape(naoaux,-1), bufpa.reshape(naoaux,-1), tmp.reshape(-1,nmo*ncas), 1, 0, p0*ncas, 0, 0) self.papa[p0:p1] = tmp.reshape(p1-p0,ncas,nmo,ncas) bufaa = bufpa[:,ncore:nocc,:].copy().reshape(-1,ncas**2) bufs1 = bufpa = None t1 = log.timer('density fitting papa pass2', *t1) mem_now = pyscf.lib.current_memory()[0] nblk = int(max(8, min(nmo, (max_memory-mem_now)*1e6/8/(nmo*naoaux+ncas**2*nmo)))) bufs1 = numpy.empty((nblk,nmo,naoaux)) bufs2 = numpy.empty((nblk,nmo,ncas,ncas)) for p0, p1 in prange(0, nmo, nblk): nrow = p1 - p0 buf = bufs1[:nrow] tmp = bufs2[:nrow].reshape(-1,ncas**2) for key, col0, col1 in fxpp_keys: buf[:nrow,:,col0:col1] = fxpp[key][p0:p1] pyscf.lib.dot(buf.reshape(-1,naoaux), bufaa, 1, tmp) self.ppaa[p0:p1] = tmp.reshape(p1-p0,nmo,ncas,ncas) bufs1 = bufs2 = buf = None t1 = log.timer('density fitting ppaa pass2', *t1) fxpp.close() self.feri.flush() dm_core = numpy.dot(mo[:,:ncore], mo[:,:ncore].T) vj, vk = casscf.get_jk(mol, dm_core) self.vhf_c = reduce(numpy.dot, (mo.T, vj*2-vk, mo)) t0 = log.timer('density fitting ao2mo', *t0)