def lazy_kernel(ot, oneCDMs, twoCDM_amo, ao2amo, max_memory=20000, hermi=1, veff2_mo=None): ''' Get the 1- and 2-body effective potential from MC-PDFT. Eventually I'll be able to specify mo slices for the 2-body part Args: ot : an instance of otfnal class oneCDMs : ndarray of shape (2, nao, nao) containing spin-separated one-body density matrices twoCDM_amo : ndarray of shape (ncas, ncas, ncas, ncas) containing spin-summed two-body cumulant density matrix in an active space ao2amo : ndarray of shape (nao, ncas) containing molecular orbital coefficients for active-space orbitals Kwargs: max_memory : int or float maximum cache size in MB default is 20000 hermi : int 1 if 1CDMs are assumed hermitian, 0 otherwise Returns : float The MC-PDFT on-top exchange-correlation energy ''' if veff2_mo is not None: raise NotImplementedError( 'Molecular orbital slices for the two-body part') ni, xctype, dens_deriv = ot._numint, ot.xctype, ot.dens_deriv norbs_ao = ao2amo.shape[0] npair = norbs_ao * (norbs_ao + 1) // 2 veff1 = np.zeros_like(oneCDMs[0]) veff2 = np.zeros((npair, npair), dtype=veff1.dtype) t0 = (time.clock(), time.time()) make_rho = tuple( ni._gen_rho_evaluator(ot.mol, oneCDMs[i, :, :], hermi) for i in range(2)) for ao, mask, weight, coords in ni.block_loop(ot.mol, ot.grids, norbs_ao, dens_deriv, max_memory): rho = np.asarray([m[0](0, ao, mask, xctype) for m in make_rho]) t0 = logger.timer(ot, 'untransformed density', *t0) Pi = get_ontop_pair_density(ot, rho, ao, oneCDMs, twoCDM_amo, ao2amo, dens_deriv, mask) t0 = logger.timer(ot, 'on-top pair density calculation', *t0) eot, vrho, vPi = ot.eval_ot(rho, Pi, weights=weight) t0 = logger.timer(ot, 'effective potential kernel calculation', *t0) veff1 += ot.get_veff_1body(rho, Pi, ao, weight, kern=vrho) t0 = logger.timer(ot, '1-body effective potential calculation', *t0) veff2 += ot.get_veff_2body(rho, Pi, ao, weight, aosym='s4', kern=vPi) t0 = logger.timer(ot, '2-body effective potential calculation', *t0) return veff1, veff2
def get_E_ot(ot, oneCDMs, twoCDM_amo, ao2amo, max_memory=20000, hermi=1): ''' E_MCPDFT = h_pq l_pq + 1/2 v_pqrs l_pq l_rs + E_ot[rho,Pi] or, in other terms, E_MCPDFT = T_KS[rho] + E_ext[rho] + E_coul[rho] + E_ot[rho, Pi] = E_DFT[1rdm] - E_xc[rho] + E_ot[rho, Pi] Args: ot : an instance of otfnal class oneCDMs : ndarray of shape (2, nao, nao) containing spin-separated one-body density matrices twoCDM_amo : ndarray of shape (ncas, ncas, ncas, ncas) containing spin-summed two-body cumulant density matrix in an active space ao2amo : ndarray of shape (nao, ncas) containing molecular orbital coefficients for active-space orbitals Kwargs: max_memory : int or float maximum cache size in MB default is 20000 hermi : int 1 if 1CDMs are assumed hermitian, 0 otherwise Returns : float The MC-PDFT on-top exchange-correlation energy ''' ni, xctype, dens_deriv = ot._numint, ot.xctype, ot.dens_deriv norbs_ao = ao2amo.shape[0] E_ot = 0.0 t0 = (time.clock(), time.time()) make_rho = tuple( ni._gen_rho_evaluator(ot.mol, oneCDMs[i, :, :], hermi) for i in range(2)) for ao, mask, weight, coords in ni.block_loop(ot.mol, ot.grids, norbs_ao, dens_deriv, max_memory): rho = np.asarray([m[0](0, ao, mask, xctype) for m in make_rho]) if ot.verbose > logger.DEBUG and dens_deriv > 0: for ideriv in range(1, 4): rho_test = np.einsum('ijk,aj,ak->ia', oneCDMs, ao[ideriv], ao[0]) rho_test += np.einsum('ijk,ak,aj->ia', oneCDMs, ao[ideriv], ao[0]) logger.debug(ot, "Spin-density derivatives, |PySCF-einsum| = %s", linalg.norm(rho[:, ideriv, :] - rho_test)) t0 = logger.timer(ot, 'untransformed density', *t0) Pi = get_ontop_pair_density(ot, rho, ao, oneCDMs, twoCDM_amo, ao2amo, dens_deriv) t0 = logger.timer(ot, 'on-top pair density calculation', *t0) E_ot += ot.get_E_ot(rho, Pi, weight) t0 = logger.timer(ot, 'on-top exchange-correlation energy calculation', *t0) return E_ot
def get_dens (d1, d2): make_rho, nset, nao = ni._gen_rho_evaluator (ot.mol, d1, 1) for ao, mask, weight, coords in ni.block_loop (ot.mol, ot.grids, nao, ot.dens_deriv, mc.max_memory, blksize=ngrids): rho = np.asarray ([make_rho (i, ao, mask, ot.xctype) for i in range(2)]) Pi = get_ontop_pair_density (ot, rho, ao, d1, d2, mo_cas, ot.dens_deriv, mask) return rho, Pi, weight
def mcpdft_HellmanFeynman_grad(mc, ot, veff1, veff2, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the Hellman-Feynman gradient terms of MC-PDFT. From the differentiated Hamiltonian matrix elements, only the core and Coulomb energy parts remain. For the renormalization terms, the effective Fock matrix is as in CASSCF, but with the same Hamiltonian substutition that is used for the energy response terms. ''' if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError t0 = (time.clock(), time.time()) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) # MRH: I need to replace aapa with the equivalent array from veff2 # I'm not sure how the outcore file-paging system works, but hopefully I can do this # I also need to generate vhf_c and vhf_a from veff2 rather than the molecule's actual integrals # The true Coulomb repulsion should already be in veff1, but I need to generate the "fake" # vj - vk/2 from veff2 h1e_mo = mo_coeff.T @ (mc.get_hcore() + veff1) @ mo_coeff + veff2.vhf_c aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=h1e_mo.dtype) vhf_a = np.zeros((nmo, nmo), dtype=h1e_mo.dtype) for i in range(nmo): jbuf = veff2.ppaa[i] kbuf = veff2.papa[i] aapa[:, :, i, :] = jbuf[ncore:nocc, :, :] vhf_a[i] = np.tensordot(jbuf, casdm1, axes=2) vhf_a *= 0.5 # for this potential, vj = vk: vj - vk/2 = vj - vj/2 = vj/2 gfock = np.zeros((nmo, nmo)) gfock[:, :ncore] = (h1e_mo[:, :ncore] + vhf_a[:, :ncore]) * 2 gfock[:, ncore:nocc] = h1e_mo[:, ncore:nocc] @ casdm1 gfock[:, ncore:nocc] += np.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T)) aapa = vhf_a = h1e_mo = gfock = None t0 = logger.timer(mc, 'PDFT HlFn gfock', *t0) dm1 = dm_core + dm_cas # MRH: vhf1c and vhf1a should be the TRUE vj_c and vj_a (no vk!) vj = mf_grad.get_jk(dm=dm1)[0] hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_coul = np.zeros((len(atmlst), 3)) de_xc = np.zeros((len(atmlst), 3)) de_grid = np.zeros((len(atmlst), 3)) de_wgt = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) # MRH: Now I have to compute the gradient of the exchange-correlation energy # This involves derivatives of the orbitals that construct rho and Pi and therefore another # set of potentials. It also involves the derivatives of quadrature grid points which # propagate through the densities and therefore yet another set of potentials. # The orbital-derivative part includes all the grid points and some of the orbitals (- sign); # the grid-derivative part includes all of the orbitals and some of the grid points (+ sign). # I'll do a loop over grid sections and make arrays of type (3,nao, nao) and (3,nao, ncas, ncas, ncas). # I'll contract them within the grid loop for the grid derivatives and in the following # orbital loop for the xc derivatives dm1s = mc.make_rdm1s() casdm1s = np.stack(mc.fcisolver.make_rdm1s(ci, ncas, nelecas), axis=0) twoCDM = get_2CDM_from_2RDM(casdm2, casdm1s) casdm1s = None make_rho = tuple( ot._numint._gen_rho_evaluator(mol, dm1s[i], 1) for i in range(2)) make_rho_c = ot._numint._gen_rho_evaluator(mol, dm_core, 1) make_rho_a = ot._numint._gen_rho_evaluator(mol, dm_cas, 1) dv1 = np.zeros( (3, nao, nao)) # Term which should be contracted with the whole density matrix dv1_a = np.zeros( (3, nao, nao) ) # Term which should only be contracted with the core density matrix dv2 = np.zeros((3, nao)) idx = np.array([[1, 4, 5, 6], [2, 5, 7, 8], [3, 6, 8, 9]], dtype=np.int_) # For addressing particular ao derivatives if ot.xctype == 'LDA': idx = idx[:, 0] # For LDAs no second derivatives diag_idx = np.arange(ncas) # for puvx diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_pack = (casdm2 + casdm2.transpose(0, 1, 3, 2)).reshape( ncas**2, ncas, ncas) casdm2_pack = pack_tril(casdm2_pack).reshape(ncas, ncas, -1) casdm2_pack[:, :, diag_idx] *= 0.5 diag_idx = np.arange(ncore, dtype=np.int_) * (ncore + 1) # for pqii full_atmlst = -np.ones(mol.natm, dtype=np.int_) t1 = logger.timer(mc, 'PDFT HlFn quadrature setup', *t0) for k, ia in enumerate(atmlst): full_atmlst[ia] = k for ia, (coords, w0, w1) in enumerate(rks_grad.grids_response_cc(ot.grids)): # For the xc potential derivative, I need every grid point in the entire molecule regardless of atmlist. (Because that's about orbitals.) # For the grid and weight derivatives, I only need the gridpoints that are in atmlst mask = gen_grid.make_mask(mol, coords) ao = ot._numint.eval_ao( mol, coords, deriv=ot.dens_deriv + 1, non0tab=mask) # Need 1st derivs for LDA, 2nd for GGA, etc. if ot.xctype == 'LDA': # Might confuse the rho and Pi generators if I don't slice this down aoval = ao[:1] elif ot.xctype == 'GGA': aoval = ao[:4] rho = np.asarray([m[0](0, aoval, mask, ot.xctype) for m in make_rho]) Pi = get_ontop_pair_density(ot, rho, aoval, dm1s, twoCDM, mo_cas, ot.dens_deriv) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} rho/Pi calc'.format(ia), *t1) moval_occ = np.tensordot(aoval, mo_occ, axes=1) moval_core = moval_occ[..., :ncore] moval_cas = moval_occ[..., ncore:] t1 = logger.timer(mc, 'PDFT HlFn quadrature atom {} ao2mo grid'.format(ia), *t1) eot, vrho, vot = ot.eval_ot(rho, Pi, weights=w0) ndpi = vot.shape[0] # Weight response de_wgt += np.tensordot(eot, w1[atmlst], axes=(0, 2)) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} weight response'.format(ia), *t1) # Find the atoms that are a part of the atomlist - grid correction shouldn't be added if they aren't there # The last stuff to vectorize is in get_veff_2body! k = full_atmlst[ia] # Vpq + Vpqii vrho = _contract_vot_rho(vot, make_rho_c[0](0, aoval, mask, ot.xctype), add_vrho=vrho) tmp_dv = np.stack([ ot.get_veff_1body(rho, Pi, [ao[ix], aoval], w0, kern=vrho) for ix in idx ], axis=0) if k >= 0: de_grid[k] += 2 * np.tensordot(tmp_dv, dm1.T, axes=2) # Grid response dv1 -= tmp_dv # XC response t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} Vpq + Vpqii'.format(ia), *t1) # Viiuv * Duv vrho_a = _contract_vot_rho(vot, make_rho_a[0](0, aoval, mask, ot.xctype)) tmp_dv = np.stack([ ot.get_veff_1body(rho, Pi, [ao[ix], aoval], w0, kern=vrho_a) for ix in idx ], axis=0) if k >= 0: de_grid[k] += 2 * np.tensordot(tmp_dv, dm_core.T, axes=2) # Grid response dv1_a -= tmp_dv # XC response t1 = logger.timer(mc, 'PDFT HlFn quadrature atom {} Viiuv'.format(ia), *t1) # Vpuvx tmp_dv = ot.get_veff_2body_kl(rho, Pi, moval_cas, moval_cas, w0, symm=True, kern=vot) # ndpi,ngrids,ncas*(ncas+1)//2 tmp_dv = np.tensordot(tmp_dv, casdm2_pack, axes=(-1, -1)) # ndpi, ngrids, ncas, ncas tmp_dv[0] = (tmp_dv[:ndpi] * moval_cas[:ndpi, :, None, :]).sum( 0) # Chain and product rule tmp_dv[1:ndpi] *= moval_cas[0, :, None, :] # Chain and product rule tmp_dv = tmp_dv.sum(-1) # ndpi, ngrids, ncas tmp_dv = np.tensordot(ao[idx[:, :ndpi]], tmp_dv, axes=((1, 2), (0, 1))) # comp, nao (orb), ncas (dm2) tmp_dv = np.einsum('cpu,pu->cp', tmp_dv, mo_cas) # comp, ncas if k >= 0: de_grid[k] += 2 * tmp_dv.sum(1) dv2 -= tmp_dv # XC response t1 = logger.timer(mc, 'PDFT HlFn quadrature atom {} Vpuvx'.format(ia), *t1) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: this should be the TRUE hcore de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 de_coul[k] += np.einsum('xij,ij->x', vj[:, p0:p1], dm1[p0:p1]) * 2 de_xc[k] += np.einsum( 'xij,ij->x', dv1[:, p0:p1], dm1[p0:p1]) * 2 # Full quadrature, only some orbitals de_xc[k] += np.einsum('xij,ij->x', dv1_a[:, p0:p1], dm_core[p0:p1]) * 2 # Ditto de_xc[k] += dv2[:, p0:p1].sum(1) * 2 # Ditto de_nuc = mf_grad.grad_nuc(mol, atmlst) logger.debug(mc, "MC-PDFT Hellmann-Feynman nuclear :\n{}".format(de_nuc)) logger.debug( mc, "MC-PDFT Hellmann-Feynman hcore component:\n{}".format(de_hcore)) logger.debug( mc, "MC-PDFT Hellmann-Feynman coulomb component:\n{}".format(de_coul)) logger.debug(mc, "MC-PDFT Hellmann-Feynman xc component:\n{}".format(de_xc)) logger.debug( mc, "MC-PDFT Hellmann-Feynman quadrature point component:\n{}".format( de_grid)) logger.debug( mc, "MC-PDFT Hellmann-Feynman quadrature weight component:\n{}".format( de_wgt)) logger.debug( mc, "MC-PDFT Hellmann-Feynman renorm component:\n{}".format(de_renorm)) de = de_nuc + de_hcore + de_coul + de_renorm + de_xc + de_grid + de_wgt t1 = logger.timer(mc, 'PDFT HlFn total', *t0) return de
def mcpdft_HellmanFeynman_grad(mc, ot, veff1, veff2, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None, max_memory=None, auxbasis_response=False): ''' Modification of pyscf.grad.casscf.kernel to compute instead the Hellman-Feynman gradient terms of MC-PDFT. From the differentiated Hamiltonian matrix elements, only the core and Coulomb energy parts remain. For the renormalization terms, the effective Fock matrix is as in CASSCF, but with the same Hamiltonian substutition that is used for the energy response terms. ''' if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError if max_memory is None: max_memory = mc.max_memory t0 = (time.process_time(), time.time()) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) # MRH: I need to replace aapa with the equivalent array from veff2 # I'm not sure how the outcore file-paging system works, but hopefully I can do this # I also need to generate vhf_c and vhf_a from veff2 rather than the molecule's actual integrals # The true Coulomb repulsion should already be in veff1, but I need to generate the "fake" # vj - vk/2 from veff2 h1e_mo = mo_coeff.T @ (mc.get_hcore() + veff1) @ mo_coeff + veff2.vhf_c aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=h1e_mo.dtype) vhf_a = np.zeros((nmo, nmo), dtype=h1e_mo.dtype) for i in range(nmo): jbuf = veff2.ppaa[i] kbuf = veff2.papa[i] aapa[:, :, i, :] = jbuf[ncore:nocc, :, :] vhf_a[i] = np.tensordot(jbuf, casdm1, axes=2) vhf_a *= 0.5 # for this potential, vj = vk: vj - vk/2 = vj - vj/2 = vj/2 gfock = np.zeros((nmo, nmo)) gfock[:, :ncore] = (h1e_mo[:, :ncore] + vhf_a[:, :ncore]) * 2 gfock[:, ncore:nocc] = h1e_mo[:, ncore:nocc] @ casdm1 gfock[:, ncore:nocc] += np.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T)) aapa = vhf_a = h1e_mo = gfock = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_coul = np.zeros((len(atmlst), 3)) de_xc = np.zeros((len(atmlst), 3)) de_grid = np.zeros((len(atmlst), 3)) de_wgt = np.zeros((len(atmlst), 3)) de_aux = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) t0 = logger.timer(mc, 'PDFT HlFn gfock', *t0) mo_coeff, ci, mo_occup = cas_natorb(mc, mo_coeff=mo_coeff, ci=ci) mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] dm1 = dm_core + dm_cas dm1 = tag_array(dm1, mo_coeff=mo_coeff, mo_occ=mo_occup) # MRH: vhf1c and vhf1a should be the TRUE vj_c and vj_a (no vk!) vj = mf_grad.get_jk(dm=dm1)[0] hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) if auxbasis_response: de_aux += vj.aux # MRH: Now I have to compute the gradient of the exchange-correlation energy # This involves derivatives of the orbitals that construct rho and Pi and therefore another # set of potentials. It also involves the derivatives of quadrature grid points which # propagate through the densities and therefore yet another set of potentials. # The orbital-derivative part includes all the grid points and some of the orbitals (- sign); # the grid-derivative part includes all of the orbitals and some of the grid points (+ sign). # I'll do a loop over grid sections and make arrays of type (3,nao, nao) and (3,nao, ncas, ncas, ncas). # I'll contract them within the grid loop for the grid derivatives and in the following # orbital loop for the xc derivatives # MRH, 05/09/2020: This just in - the actual spin density doesn't matter at all in PDFT! # I could probably save a fair amount of time by not screwing around with the actual spin density! # Also, the cumulant decomposition can always be defined without the spin-density matrices and # it's still valid! But one thing at a time. mo_n = mo_occ * mo_occup[None, :nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) twoCDM = get_2CDM_from_2RDM(casdm2, casdm1) dm1s = np.stack((dm1 / 2.0, ) * 2, axis=0) dm1 = tag_array(dm1, mo_coeff=mo_occ, mo_occ=mo_occup[:nocc]) make_rho = ot._numint._gen_rho_evaluator(mol, dm1, 1)[0] dvxc = np.zeros((3, nao)) idx = np.array([[1, 4, 5, 6], [2, 5, 7, 8], [3, 6, 8, 9]], dtype=np.int_) # For addressing particular ao derivatives if ot.xctype == 'LDA': idx = idx[:, 0:1] # For LDAs no second derivatives diag_idx = np.arange(ncas) # for puvx diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_pack = (twoCDM + twoCDM.transpose(0, 1, 3, 2)).reshape( ncas**2, ncas, ncas) casdm2_pack = pack_tril(casdm2_pack).reshape(ncas, ncas, -1) casdm2_pack[:, :, diag_idx] *= 0.5 diag_idx = np.arange(ncore, dtype=np.int_) * (ncore + 1) # for pqii full_atmlst = -np.ones(mol.natm, dtype=np.int_) t1 = logger.timer(mc, 'PDFT HlFn quadrature setup', *t0) for k, ia in enumerate(atmlst): full_atmlst[ia] = k for ia, (coords, w0, w1) in enumerate(rks_grad.grids_response_cc(ot.grids)): mask = gen_grid.make_mask(mol, coords) # For the xc potential derivative, I need every grid point in the entire molecule regardless of atmlist. (Because that's about orbitals.) # For the grid and weight derivatives, I only need the gridpoints that are in atmlst # It is conceivable that I can make this more efficient by only doing cross-combinations of grids and AOs, but I don't know how "mask" # works yet or how else I could do this. gc.collect() ngrids = coords.shape[0] ndao = (1, 4)[ot.dens_deriv] ndpi = (1, 4)[ot.Pi_deriv] ncols = 1.05 * 3 * (ndao * (nao + nocc) + max(ndao * nao, ndpi * ncas * ncas)) remaining_floats = (max_memory - current_memory()[0]) * 1e6 / 8 blksize = int(remaining_floats / (ncols * BLKSIZE)) * BLKSIZE blksize = max(BLKSIZE, min(blksize, ngrids, BLKSIZE * 1200)) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} mask and memory setup'.format(ia), *t1) for ip0 in range(0, ngrids, blksize): ip1 = min(ngrids, ip0 + blksize) logger.info( mc, 'PDFT gradient atom {} slice {}-{} of {} total'.format( ia, ip0, ip1, ngrids)) ao = ot._numint.eval_ao( mol, coords[ip0:ip1], deriv=ot.dens_deriv + 1, non0tab=mask) # Need 1st derivs for LDA, 2nd for GGA, etc. t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} ao grids'.format(ia), *t1) if ot.xctype == 'LDA': # Might confuse the rho and Pi generators if I don't slice this down aoval = ao[0] if ot.xctype == 'GGA': aoval = ao[:4] rho = make_rho(0, aoval, mask, ot.xctype) / 2.0 rho = np.stack((rho, ) * 2, axis=0) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} rho calc'.format(ia), *t1) Pi = get_ontop_pair_density(ot, rho, aoval, dm1s, twoCDM, mo_cas, ot.dens_deriv, mask) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} Pi calc'.format(ia), *t1) if ot.xctype == 'LDA': # TODO: consistent format requirements for shape of ao grid aoval = ao[:1] moval_occ = _grid_ao2mo(mol, aoval, mo_occ, mask) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} ao2mo grid'.format(ia), *t1) aoval = np.ascontiguousarray([ ao[ix].transpose(0, 2, 1) for ix in idx[:, :ndao] ]).transpose(0, 1, 3, 2) ao = None t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} ao grid reshape'.format(ia), *t1) eot, vot = ot.eval_ot(rho, Pi, weights=w0[ip0:ip1])[:2] vrho, vPi = vot t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} eval_ot'.format(ia), *t1) puvx_mem = 2 * ndpi * (ip1 - ip0) * ncas * ncas * 8 / 1e6 remaining_mem = max_memory - current_memory()[0] logger.info( mc, 'PDFT gradient memory note: working on {} grid points; estimated puvx usage = {:.1f} of {:.1f} remaining MB' .format((ip1 - ip0), puvx_mem, remaining_mem)) # Weight response de_wgt += np.tensordot(eot, w1[atmlst, ..., ip0:ip1], axes=(0, 2)) t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} weight response'.format(ia), *t1) # Find the atoms that are a part of the atomlist - grid correction shouldn't be added if they aren't there # The last stuff to vectorize is in get_veff_2body! k = full_atmlst[ia] # Vpq + Vpqrs * Drs ; I'm not sure why the list comprehension down there doesn't break ao's stride order but I'm not complaining vrho = _contract_vot_rho(vPi, rho.sum(0), add_vrho=vrho) tmp_dv = np.stack([ ot.get_veff_1body( rho, Pi, [ao_i, moval_occ], w0[ip0:ip1], kern=vrho) for ao_i in aoval ], axis=0) tmp_dv = (tmp_dv * mo_occ[None, :, :] * mo_occup[None, None, :nocc]).sum(2) if k >= 0: de_grid[k] += 2 * tmp_dv.sum(1) # Grid response dvxc -= tmp_dv # XC response vrho = tmp_dv = None t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} Vpq + Vpqrs * Drs'.format(ia), *t1) # Vpuvx * Lpuvx ; remember the stupid slowest->fastest->medium stride order of the ao grid arrays moval_cas = moval_occ = np.ascontiguousarray( moval_occ[..., ncore:].transpose(0, 2, 1)).transpose(0, 2, 1) tmp_dv = ot.get_veff_2body_kl( rho, Pi, moval_cas, moval_cas, w0[ip0:ip1], symm=True, kern=vPi) # ndpi,ngrids,ncas*(ncas+1)//2 tmp_dv = np.tensordot(tmp_dv, casdm2_pack, axes=(-1, -1)) # ndpi, ngrids, ncas, ncas tmp_dv[0] = (tmp_dv[:ndpi] * moval_cas[:ndpi, :, None, :]).sum( 0) # Chain and product rule tmp_dv[1:ndpi] *= moval_cas[0, :, None, :] # Chain and product rule tmp_dv = tmp_dv.sum(-1) # ndpi, ngrids, ncas tmp_dv = np.tensordot(aoval[:, :ndpi], tmp_dv, axes=((1, 2), (0, 1))) # comp, nao (orb), ncas (dm2) tmp_dv = np.einsum( 'cpu,pu->cp', tmp_dv, mo_cas ) # comp, ncas (it's ok to not vectorize this b/c the quadrature grid is gone) if k >= 0: de_grid[k] += 2 * tmp_dv.sum(1) # Grid response dvxc -= tmp_dv # XC response tmp_dv = None t1 = logger.timer( mc, 'PDFT HlFn quadrature atom {} Vpuvx * Lpuvx'.format(ia), *t1) rho = Pi = eot = vot = vPi = aoval = moval_occ = moval_cas = None gc.collect() for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: this should be the TRUE hcore de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 de_coul[k] += np.einsum('xij,ij->x', vj[:, p0:p1], dm1[p0:p1]) * 2 de_xc[k] += dvxc[:, p0:p1].sum( 1) * 2 # Full quadrature, only some orbitals de_nuc = mf_grad.grad_nuc(mol, atmlst) logger.debug(mc, "MC-PDFT Hellmann-Feynman nuclear :\n{}".format(de_nuc)) logger.debug( mc, "MC-PDFT Hellmann-Feynman hcore component:\n{}".format(de_hcore)) logger.debug( mc, "MC-PDFT Hellmann-Feynman coulomb component:\n{}".format(de_coul)) logger.debug(mc, "MC-PDFT Hellmann-Feynman xc component:\n{}".format(de_xc)) logger.debug( mc, "MC-PDFT Hellmann-Feynman quadrature point component:\n{}".format( de_grid)) logger.debug( mc, "MC-PDFT Hellmann-Feynman quadrature weight component:\n{}".format( de_wgt)) logger.debug( mc, "MC-PDFT Hellmann-Feynman renorm component:\n{}".format(de_renorm)) de = de_nuc + de_hcore + de_coul + de_renorm + de_xc + de_grid + de_wgt if auxbasis_response: de += de_aux logger.debug( mc, "MC-PDFT Hellmann-Feynman aux component:\n{}".format(de_aux)) t1 = logger.timer(mc, 'PDFT HlFn total', *t0) return de
def kernel(ot, oneCDMs_amo, twoCDM_amo, mo_coeff, ncore, ncas, max_memory=2000, hermi=1, paaa_only=False, aaaa_only=False): ''' Get the 1- and 2-body effective potential from MC-PDFT. Args: ot : an instance of otfnal class oneCDMs_amo : ndarray of shape (2, ncas, ncas) containing spin-separated one-body density matrices twoCDM_amo : ndarray of shape (ncas, ncas, ncas, ncas) containing spin-summed two-body cumulant density matrix in an active space mo_coeff : ndarray of shape (nao, nmo) containing molecular orbital coefficients ncore : integer number of inactive orbitals ncas : integer number of active orbitals Kwargs: max_memory : int or float maximum cache size in MB default is 2000 hermi : int 1 if 1CDMs are assumed hermitian, 0 otherwise paaa_only : logical If true, only compute the paaa range of papa and ppaa (all other elements set to zero) aaaa_only : logical If true, only compute the aaaa range of papa and ppaa (all other elements set to zero; overrides paaa_only) Returns: veff1 : ndarray of shape (nao, nao) 1-body effective potential veff2 : object of class pdft_veff._ERIS 2-body effective potential and related quantities ''' nocc = ncore + ncas ni, xctype, dens_deriv = ot._numint, ot.xctype, ot.dens_deriv nao = mo_coeff.shape[0] mo_core = mo_coeff[:, :ncore] ao2amo = mo_coeff[:, ncore:nocc] npair = nao * (nao + 1) // 2 shls_slice = (0, ot.mol.nbas) ao_loc = ot.mol.ao_loc_nr() veff1 = np.zeros((nao, nao), dtype=oneCDMs_amo.dtype) veff2 = _ERIS(ot.mol, mo_coeff, ncore, ncas, paaa_only=paaa_only, aaaa_only=aaaa_only, verbose=ot.verbose, stdout=ot.stdout) t0 = (time.process_time(), time.time()) # Make density matrices and TAG THEM with their own eigendecompositions # because that speeds up the rho generators! dm_core = mo_core @ mo_core.T dm_cas = np.dot(ao2amo, np.dot(oneCDMs_amo, ao2amo.T)).transpose(1, 0, 2) dm1s = dm_cas + dm_core[None, :, :] dm_core *= 2 # tag dm_core imo_occ = np.ones(ncore, dtype=dm_core.dtype) * 2.0 dm_core = tag_array(dm_core, mo_coeff=mo_core, mo_occ=imo_occ) # tag dm_cas amo_occ = np.zeros((2, ncas), dtype=dm_cas.dtype) amo_coeff = np.stack([ao2amo.copy(), ao2amo.copy()], axis=0) for i in range(2): amo_occ[i], ua = linalg.eigh(oneCDMs_amo[i]) amo_coeff[i] = amo_coeff[i] @ ua dm_cas = tag_array(dm_cas, mo_coeff=amo_coeff, mo_occ=amo_occ) # tag dm1s mo_occ = np.zeros((2, nocc), dtype=dm1s.dtype) mo_occ[:, :ncore] = 1.0 mo_occ[:, ncore:nocc] = amo_occ tag_coeff = np.stack( (mo_coeff[:, :nocc].copy(), mo_coeff[:, :nocc].copy()), axis=0) tag_coeff[:, :, ncore:nocc] = amo_coeff dm1s = tag_array(dm1s, mo_coeff=tag_coeff, mo_occ=mo_occ) # rho generators make_rho_c, nset_c, nao_c = ni._gen_rho_evaluator(ot.mol, dm_core, hermi) make_rho_a, nset_a, nao_a = ni._gen_rho_evaluator(ot.mol, dm_cas, hermi) make_rho, nset, nao_ = ni._gen_rho_evaluator(ot.mol, dm1s, hermi) # memory block size gc.collect() remaining_floats = (max_memory - current_memory()[0]) * 1e6 / 8 nderiv_rho = (1, 4, 10)[dens_deriv] # ?? for meta-GGA nderiv_Pi = (1, 4)[ot.Pi_deriv] ncols = 4 + nderiv_rho * nao # ao, weight, coords ncols += nderiv_rho * 4 + nderiv_Pi # rho, rho_a, rho_c, Pi ncols += 1 + nderiv_rho + nderiv_Pi # eot, vot # Asynchronous part nveff1 = nderiv_rho * (nao + 1) # footprint of get_veff_1body nveff2 = veff2._accumulate_ftpt() * nderiv_Pi ncols += np.amax([nveff1, nveff2]) # asynchronous fns pdft_blksize = int(remaining_floats / (ncols * BLKSIZE)) * BLKSIZE # round up if ot.grids.coords is None: ot.grids.build(with_non0tab=True) ngrids = ot.grids.coords.shape[0] pdft_blksize = max(BLKSIZE, min(pdft_blksize, ngrids, BLKSIZE * 1200)) logger.debug( ot, '{} MB used of {} available; block size of {} chosen for grid with {} points' .format(current_memory()[0], max_memory, pdft_blksize, ngrids)) # The actual loop for ao, mask, weight, coords in ni.block_loop(ot.mol, ot.grids, nao, dens_deriv, max_memory, blksize=pdft_blksize): rho = np.asarray([make_rho(i, ao, mask, xctype) for i in range(2)]) rho_a = sum([make_rho_a(i, ao, mask, xctype) for i in range(2)]) rho_c = make_rho_c(0, ao, mask, xctype) t0 = logger.timer(ot, 'untransformed densities (core and total)', *t0) Pi = get_ontop_pair_density(ot, rho, ao, dm1s, twoCDM_amo, ao2amo, dens_deriv, mask) t0 = logger.timer(ot, 'on-top pair density calculation', *t0) eot, vot = ot.eval_ot(rho, Pi, weights=weight)[:2] vrho, vPi = vot t0 = logger.timer(ot, 'effective potential kernel calculation', *t0) if ao.ndim == 2: ao = ao[None, :, :] # TODO: consistent format req's ao LDA case veff1 += ot.get_veff_1body(rho, Pi, ao, weight, non0tab=mask, shls_slice=shls_slice, ao_loc=ao_loc, hermi=1, kern=vrho) t0 = logger.timer(ot, '1-body effective potential calculation', *t0) veff2._accumulate(ot, rho, Pi, ao, weight, rho_c, rho_a, vPi, mask, shls_slice, ao_loc) t0 = logger.timer(ot, '2-body effective potential calculation', *t0) veff2._finalize() t0 = logger.timer(ot, 'Finalizing 2-body effective potential calculation', *t0) return veff1, veff2
def kernel(ot, oneCDMs_amo, twoCDM_amo, mo_coeff, ncore, ncas, max_memory=20000, hermi=1, veff2_mo=None, paaa_only=False): ''' Get the 1- and 2-body effective potential from MC-PDFT. Eventually I'll be able to specify mo slices for the 2-body part Args: ot : an instance of otfnal class oneCDMs_amo : ndarray of shape (2, ncas, ncas) containing spin-separated one-body density matrices twoCDM_amo : ndarray of shape (ncas, ncas, ncas, ncas) containing spin-summed two-body cumulant density matrix in an active space ao2amo : ndarray of shape (nao, ncas) containing molecular orbital coefficients for active-space orbitals Kwargs: max_memory : int or float maximum cache size in MB default is 20000 hermi : int 1 if 1CDMs are assumed hermitian, 0 otherwise Returns : float The MC-PDFT on-top exchange-correlation energy ''' if veff2_mo is not None: raise NotImplementedError( 'Molecular orbital slices for the two-body part') nocc = ncore + ncas ni, xctype, dens_deriv = ot._numint, ot.xctype, ot.dens_deriv norbs_ao = mo_coeff.shape[0] mo_core = mo_coeff[:, :ncore] ao2amo = mo_coeff[:, ncore:nocc] npair = norbs_ao * (norbs_ao + 1) // 2 veff1 = np.zeros((norbs_ao, norbs_ao), dtype=oneCDMs_amo.dtype) veff2 = _ERIS(ot.mol, mo_coeff, ncore, ncas, paaa_only=paaa_only, verbose=ot.verbose, stdout=ot.stdout) t0 = (time.clock(), time.time()) dm_core = mo_core @ mo_core.T dm_cas = np.dot(ao2amo, np.dot(oneCDMs_amo, ao2amo.T)).transpose(1, 0, 2) dm1s = dm_cas + dm_core[None, :, :] dm_core *= 2 # Can't trust that NOs are the same for alpha and beta. Have to do this explicitly here # Begin tag block: dm_core imo_occ = np.ones(ncore, dtype=dm_core.dtype) * 2.0 dm_core = tag_array(dm_core, mo_coeff=mo_core, mo_occ=imo_occ) # Begin tag block: dm_cas amo_occ = np.zeros((2, ncas), dtype=dm_cas.dtype) amo_coeff = np.stack([ao2amo.copy(), ao2amo.copy()], axis=0) for i in range(2): amo_occ[i], ua = linalg.eigh(oneCDMs_amo[i]) amo_coeff[i] = amo_coeff[i] @ ua dm_cas = tag_array(dm_cas, mo_coeff=amo_coeff, mo_occ=amo_occ) # Begin tag block: dm1s mo_occ = np.zeros((2, nocc), dtype=dm1s.dtype) mo_occ[:, :ncore] = 1.0 mo_occ[:, ncore:nocc] = amo_occ tag_coeff = np.stack( (mo_coeff[:, :nocc].copy(), mo_coeff[:, :nocc].copy()), axis=0) tag_coeff[:, :, ncore:nocc] = amo_coeff dm1s = tag_array(dm1s, mo_coeff=tag_coeff, mo_occ=mo_occ) # End tag block make_rho_c, nset_c, nao_c = ni._gen_rho_evaluator(ot.mol, dm_core, hermi) make_rho_a, nset_a, nao_a = ni._gen_rho_evaluator(ot.mol, dm_cas, hermi) make_rho, nset, nao = ni._gen_rho_evaluator(ot.mol, dm1s, hermi) gc.collect() remaining_floats = (max_memory - current_memory()[0]) * 1e6 / 8 nderiv_rho = (1, 4, 10)[dens_deriv] # ?? for meta-GGA nderiv_Pi = (1, 4)[ot.Pi_deriv] ncols_v2 = norbs_ao * ncas + ncas**2 if paaa_only else 2 * norbs_ao * ncas ncols = 1 + nderiv_rho * (5 + norbs_ao * 2) + nderiv_Pi * (1 + ncols_v2) pdft_blksize = int( remaining_floats / (ncols * BLKSIZE)) * BLKSIZE # something something indexing if ot.grids.coords is None: ot.grids.build(with_non0tab=True) ngrids = ot.grids.coords.shape[0] pdft_blksize = max(BLKSIZE, min(pdft_blksize, ngrids, BLKSIZE * 1200)) logger.debug( ot, '{} MB used of {} available; block size of {} chosen for grid with {} points' .format(current_memory()[0], max_memory, pdft_blksize, ngrids)) shls_slice = (0, ot.mol.nbas) ao_loc = ot.mol.ao_loc_nr() for ao, mask, weight, coords in ni.block_loop(ot.mol, ot.grids, norbs_ao, dens_deriv, max_memory, blksize=pdft_blksize): rho = np.asarray([make_rho(i, ao, mask, xctype) for i in range(2)]) rho_a = np.asarray([make_rho_a(i, ao, mask, xctype) for i in range(2)]) rho_c = make_rho_c(0, ao, mask, xctype) t0 = logger.timer(ot, 'untransformed densities (core and total)', *t0) Pi = get_ontop_pair_density(ot, rho, ao, dm1s, twoCDM_amo, ao2amo, dens_deriv, mask) t0 = logger.timer(ot, 'on-top pair density calculation', *t0) eot, vrho, vPi = ot.eval_ot(rho, Pi, weights=weight) t0 = logger.timer(ot, 'effective potential kernel calculation', *t0) veff1 += ot.get_veff_1body(rho, Pi, ao, weight, non0tab=mask, shls_slice=shls_slice, ao_loc=ao_loc, hermi=1, kern=vrho) t0 = logger.timer(ot, '1-body effective potential calculation', *t0) #ao[:,:,:] = np.tensordot (ao, mo_coeff, axes=1) #t0 = logger.timer (ot, 'ao2mo grid points', *t0) veff2._accumulate(ot, rho, Pi, ao, weight, rho_c, rho_a, vPi, mask, shls_slice, ao_loc) t0 = logger.timer(ot, '2-body effective potential calculation', *t0) veff2._finalize() t0 = logger.timer(ot, 'Finalizing 2-body effective potential calculation', *t0) return veff1, veff2