def ao2mo(self, mo_coeffs, compact=True): if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs, ) * 4 ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) mo_eri = numpy.zeros((nij_pair, nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) Lij = Lkl = None for eri1 in self.loop(): Lij = _ao2mo.nr_e2(eri1, moij, ijslice, aosym='s2', mosym=ijmosym, out=Lij) if sym: Lkl = Lij else: Lkl = _ao2mo.nr_e2(eri1, mokl, klslice, aosym='s2', mosym=klmosym, out=Lkl) lib.dot(Lij.T, Lkl, 1, mo_eri, 1) return mo_eri
def ao2mo(self, mo_coeffs, compact=True): if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) mo_eri = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) Lij = Lkl = None for eri1 in self.loop(): Lij = _ao2mo.nr_e2(eri1, moij, ijslice, aosym='s2', mosym=ijmosym, out=Lij) if sym: Lkl = Lij else: Lkl = _ao2mo.nr_e2(eri1, mokl, klslice, aosym='s2', mosym=klmosym, out=Lkl) lib.dot(Lij.T, Lkl, 1, mo_eri, 1) return mo_eri
def get_int3c_mo(mol, auxmol, mo_coeff, compact=getattr(__config__, 'df_df_DF_ao2mo_compact', True), max_memory=None): ''' Evaluate (P|uv) c_ui c_vj -> (P|ij) Args: mol: gto.Mole auxmol: gto.Mole, contains auxbasis mo_coeff: ndarray, list, or tuple containing MO coefficients if two ndarrays mo_coeff = (mo0, mo1) are provided, mo0 and mo1 are used for the two AO dimensions Kwargs: compact: bool If true, will return only unique ERIs along the two MO dimensions. Does nothing if mo_coeff contains two different sets of orbitals. max_memory: int Maximum memory consumption in MB Returns: int3c: ndarray of shape (naux, nmo0, nmo1) or (naux, nmo*(nmo+1)//2) ''' nao, naux, nbas, nauxbas = mol.nao, auxmol.nao, mol.nbas, auxmol.nbas npair = nao * (nao + 1) // 2 if max_memory is None: max_memory = mol.max_memory # Separate mo_coeff if isinstance(mo_coeff, np.ndarray) and mo_coeff.ndim == 2: mo0 = mo1 = mo_coeff else: mo0, mo1 = mo_coeff[0], mo_coeff[1] nmo0, nmo1 = mo0.shape[-1], mo1.shape[-1] mosym, nmo_pair, mo_conc, mo_slice = _conc_mos(mo0, mo1, compact=compact) # (P|uv) -> (P|ij) get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij') int3c = np.zeros((naux, nmo_pair), dtype=mo0.dtype) max_memory -= lib.current_memory()[0] blksize = int(min(max(max_memory * 1e6 / 8 / (npair * 2), 20), 240)) aux_loc = auxmol.ao_loc aux_ranges = balance_partition(aux_loc, blksize) for shl0, shl1, nL in aux_ranges: int3c_ao = get_int3c((0, nbas, 0, nbas, shl0, shl1)) # (uv|P) p0, p1 = aux_loc[shl0], aux_loc[shl1] int3c_ao = int3c_ao.T # is apparently stored f-contiguous but in the actual memory order I need, so just transpose int3c[p0:p1] = _ao2mo.nr_e2(int3c_ao, mo_conc, mo_slice, aosym='s2', mosym=mosym, out=int3c[p0:p1]) int3c_ao = None # Shape and return if 's1' in mosym: int3c = int3c.reshape(naux, nmo0, nmo1) return int3c
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert (out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:, 0] kptjs_lst = kptij_lst[:, 1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max( 2000, mydf.max_memory - lib.current_memory()[0] - nao**4 * 16 / 1e6) * .5 fswap = lib.H5TmpFile() tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] kptjs = kptjs_lst[adapted_ji_idx] coulG = mydf.weighted_coulG(q, False, mydf.mesh) coulG *= factor moij_list = [] ijslice_list = [] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] moij_list.append(moij) ijslice_list.append(ijslice) fswap.create_dataset('zij/' + str(ji), (ngrids, nmoi * nmoj), 'D') for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, kptjs, max_memory=max_memory): for ji, aoao in enumerate(aoaoks): ki = adapted_ji_idx[ji] // nkpts kj = adapted_ji_idx[ji] % nkpts buf = aoao.transpose(1, 2, 0).reshape(nao**2, p1 - p0) zij = _ao2mo.r_e2(lib.transpose(buf), moij_list[ji], ijslice_list[ji], tao, ao_loc) zij *= coulG[p0:p1, None] fswap['zij/' + str(ji)][p0:p1] = zij mokl_list = [] klslice_list = [] for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] mokl_list.append(mokl) klslice_list.append(klslice) fswap.create_dataset('zkl/' + str(kk), (ngrids, nmok * nmol), 'D') ki = adapted_ji_idx[0] // nkpts kj = adapted_ji_idx[0] % nkpts kptls = kpts[kconserv[ki, kj, :]] for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, -kptls, max_memory=max_memory): for kk, aoao in enumerate(aoaoks): buf = aoao.conj().transpose(1, 2, 0).reshape(nao**2, p1 - p0) zkl = _ao2mo.r_e2(lib.transpose(buf), mokl_list[kk], klslice_list[kk], tao, ao_loc) fswap['zkl/' + str(kk)][p0:p1] = zkl for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki, kj]], max_memory, False, mydf.blockdim): zij.append( _ao2mo.r_e2(LpqR + LpqI * 1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] eri_mo = lib.dot( numpy.asarray(fswap['zij/' + str(ji)]).T, numpy.asarray(fswap['zkl/' + str(kk)])) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR + LrsI * 1j, mokl_list[kk], klslice_list[kk], tao, ao_loc) lib.dot(zij[i].T, zkl, sign * factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki, kj, kk] = eri_mo.reshape(eri_shape[3:]) del (fswap['zij']) del (fswap['zkl']) return out
def general(mydf, mo_coeffs, kpts=None, compact=True): kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs, ) * 4 q = kptj - kpti coulG = mydf.weighted_coulG(q, False, mydf.gs) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI, 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti - kptl) and is_zero(kptj - kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) buf *= numpy.sqrt(coulG[p0:p1]).reshape(-1, 1) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR - rskI * 1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1].reshape(-1, 1) lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def general(mydf, mo_coeffs, kpts=None, compact=True): cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) coulG = mydf.weighted_coulG(kptj-kpti, False, mydf.gs) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI, 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) coulG = mydf.weighted_coulG(kptj-kpti, False, mydf.gs) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], max_memory=max_memory): buf = lib.transpose(pqkR+pqkI*1j, out=buf) buf *= numpy.sqrt(coulG[p0:p1]).reshape(-1,1) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None coulG = mydf.weighted_coulG(kptj-kpti, False, mydf.gs) zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], max_memory=max_memory*.5), mydf.pw_loop(mydf.gs,-kptijkl[2:], max_memory=max_memory*.5)): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR-rskI*1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1].reshape(-1,1) lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] eri_mo = numpy.zeros((nmoi*nmoj,nmok*nmol), dtype=numpy.complex128) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl, klslice, tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) return out
def get_WmnI_diag(gw, orbs, kptlist, freqs, max_memory=8000): ''' Compute GW correlation self-energy (diagonal elements) in MO basis on imaginary axis ''' mo_energy = np.array(gw._scf.mo_energy) mo_coeff = np.array(gw._scf.mo_coeff) nocc = gw.nocc nmo = gw.nmo nkpts = gw.nkpts kpts = gw.kpts nklist = len(kptlist) nw = len(freqs) norbs = len(orbs) mydf = gw.with_df # possible kpts shift center kscaled = gw.mol.get_scaled_kpts(kpts) kscaled -= kscaled[0] Del_00, Del_P0, qij, q_abs = None, None, None, None if gw.fc: # Set up q mesh for q->0 finite size correction q_pts = np.array([1e-3, 0, 0]).reshape(1, 3) nq_pts = len(q_pts) q_abs = gw.mol.get_abs_kpts(q_pts) # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) qij = get_qij(gw, q_abs[0], mo_coeff) Wmn = np.zeros((nkpts, nklist, nmo, norbs, nw), dtype=np.complex128) if gw.fc: Del_P0 = np.zeros((nklist, norbs, nw), dtype=np.complex128) Del_00 = np.zeros(nw, dtype=np.complex128) for kL in range(nkpts): # Lij: (ki, L, i, j) for looping every kL Lij = [] # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) kidx = np.zeros((nkpts), dtype=np.int64) kidx_r = np.zeros((nkpts), dtype=np.int64) for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 if is_kconserv: kidx[i] = j kidx_r[j] = i logger.debug( gw, "Read Lpq (kL: %s / %s, ki: %s, kj: %s)" % (kL + 1, nkpts, i, j)) Lij_out = None # Read (L|pq) and ao2mo transform to (L|ij) Lpq = [] for LpqR, LpqI, sign in mydf.sr_loop([kpti, kptj], max_memory=0.1 * gw._scf.max_memory, compact=False): Lpq.append(LpqR + LpqI * 1.0j) # support uneqaul naux on different k points Lpq = np.vstack(Lpq).reshape(-1, nmo**2) tao = [] ao_loc = None moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] Lij_out = _ao2mo.r_e2(Lpq, moij, ijslice, tao, ao_loc, out=Lij_out) Lij.append(Lij_out.reshape(-1, nmo, nmo)) Lij = np.asarray(Lij) naux = Lij.shape[1] if kL == 0: for w in range(nw): # body dielectric matrix eps_body Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) eps_body_inv = np.linalg.inv(np.eye(naux) - Pi) if gw.fc: # head dielectric matrix eps_00 Pi_00 = get_rho_response_head(gw, freqs[w], mo_energy, qij) eps_00 = 1. - 4. * np.pi / np.linalg.norm( q_abs[0])**2 * Pi_00 # wings dielectric matrix eps_P0 Pi_P0 = get_rho_response_wing(gw, freqs[w], mo_energy, Lij, qij) eps_P0 = -np.sqrt(4. * np.pi) / np.linalg.norm( q_abs[0]) * Pi_P0 # inverse dielectric matrix eps_inv_00 = 1. / (eps_00 - np.dot( np.dot(eps_P0.conj(), eps_body_inv), eps_P0)) eps_inv_P0 = -eps_inv_00 * np.dot(eps_body_inv, eps_P0) # head correction Del_00[w] = 2. / np.pi * (6. * np.pi**2 / gw.mol.vol / nkpts)**(1. / 3.) * (eps_inv_00 - 1.) wings_const = np.sqrt(gw.mol.vol / 4. / np.pi**3) * ( 6. * np.pi**2 / gw.mol.vol / nkpts)**(2. / 3.) eps_inv_PQ = eps_body_inv for k in range(nklist): kn = kptlist[k] # Find km that conserves with kn and kL (-km+kn+kL=G) km = kidx_r[kn] Qmn = einsum('Pmn,PQ->Qmn', Lij[km][:, :, orbs].conj(), eps_inv_PQ - np.eye(naux)) Wmn[km, k, :, :, w] = 1. / nkpts * einsum( 'Qmn,Qmn->mn', Qmn, Lij[km][:, :, orbs]) if gw.fc: # compute wing correction Wn_P0 = einsum('Pnm,P->nm', Lij[kn], eps_inv_P0).diagonal() Wn_P0 = Wn_P0.real * 2. Del_P0[k, :, w] = wings_const * Wn_P0[orbs] else: for w in range(nw): Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) Pi_inv = np.linalg.inv(np.eye(naux) - Pi) - np.eye(naux) for k in range(nklist): kn = kptlist[k] # Find km that conserves with kn and kL (-km+kn+kL=G) km = kidx_r[kn] Qmn = einsum('Pmn,PQ->Qmn', Lij[km][:, :, orbs].conj(), Pi_inv) Wmn[km, k, :, :, w] = 1. / nkpts * einsum( 'Qmn,Qmn->mn', Qmn, Lij[km][:, :, orbs]) return Wmn, Del_00, Del_P0, qij, q_abs
def half_e1(mol, mo_coeffs, swapfile, intor='int2e', aosym='s4', comp=1, max_memory=MAX_MEMORY, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True, ao2mopt=None): r'''Half transform arbitrary spherical AO integrals to MO integrals for the given two sets of orbitals Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeff : ndarray Transform (ij|kl) with the same set of orbitals. swapfile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. The transformed integrals are saved in blocks. Kwargs intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. int2e_ip_sph has 3 components. verbose : int Print level max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals ao2mopt : :class:`AO2MOpt` object Precomputed data to improve perfomance Returns: None ''' if any(c.dtype == numpy.complex128 for c in mo_coeffs): raise NotImplementedError('Integral transformation for complex orbitals') intor = mol._add_suffix(intor) time0 = (logger.process_clock(), logger.perf_counter()) log = logger.new_logger(mol, verbose) nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2ij'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao ijmosym, nij_pair, moij, ijshape = \ incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym in ('s4', 's2ij')) e1buflen, mem_words, iobuf_words, ioblk_words = \ guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp) ioblk_size = ioblk_words * 8/1e6 # The buffer to hold AO integrals in C code, see line (@) aobuflen = max(int((mem_words - 2*comp*e1buflen*nij_pair) // (nao_pair*comp)), IOBUF_ROW_MIN) ao_loc = mol.ao_loc_nr('_cart' in intor) shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen, ao_loc) if ao2mopt is None: if intor == 'int2e_cart' or intor == 'int2e_sph': ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') else: ao2mopt = _ao2mo.AO2MOpt(mol, intor) if isinstance(swapfile, h5py.Group): fswap = swapfile else: fswap = lib.H5TmpFile(swapfile) for icomp in range(comp): fswap.create_group(str(icomp)) # for h5py old version log.debug('step1: tmpfile %s %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6) log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB', nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6) nstep = len(shranges) e1buflen = max([x[2] for x in shranges]) e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, e1buflen) def save(istep, iobuf): for icomp in range(comp): _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp], e2buflen, None) # transform e1 ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0) with lib.call_in_background(save) as async_write: buf1 = numpy.empty((comp*e1buflen,nao_pair)) buf2 = numpy.empty((comp*e1buflen,nij_pair)) buf_write = numpy.empty_like(buf2) fill = _ao2mo.nr_e1fill f_e1 = _ao2mo.nr_e1 for istep,sh_range in enumerate(shranges): log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', istep+1, nstep, *(sh_range[:3])) buflen = sh_range[2] iobuf = numpy.ndarray((comp,buflen,nij_pair), buffer=buf2) nmic = len(sh_range[3]) p1 = 0 for imic, aoshs in enumerate(sh_range[3]): log.debug2(' fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', imic+1, nmic, *aoshs) buf = fill(intor, aoshs, mol._atm, mol._bas, mol._env, aosym, comp, ao2mopt, out=buf1).reshape(-1,nao_pair) buf = f_e1(buf, moij, ijshape, aosym, ijmosym) p0, p1 = p1, p1 + aoshs[2] iobuf[:,p0:p1] = buf.reshape(comp,aoshs[2],nij_pair) ti0 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) async_write(istep, iobuf) buf2, buf_write = buf_write, buf2 fswap = None return swapfile
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * 0.5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3]) ijR = klR = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = None return eri_mo elif (abs(kpti - kptk).sum() < KPT_DIFF_TOL) and (abs(kptj - kptl).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3]) zij = zkl = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif (abs(kpti - kptl).sum() < KPT_DIFF_TOL) and (abs(kptj - kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2]) zij = zlk = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) zij = zkl = None for (LpqR, LpqI), (LrsR, LrsI) in lib.izip( mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False) ): zij, zkl = _ztrans(LpqR + LpqI * 1j, zij, moij, ijslice, LrsR + LrsI * 1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 fswap = lib.H5TmpFile() tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] kptjs = kptjs_lst[adapted_ji_idx] coulG = mydf.weighted_coulG(q, False, mydf.mesh) coulG *= factor moij_list = [] ijslice_list = [] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] moij_list.append(moij) ijslice_list.append(ijslice) fswap.create_dataset('zij/'+str(ji), (ngrids,nmoi*nmoj), 'D') for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, kptjs): for ji, aoao in enumerate(aoaoks): ki = adapted_ji_idx[ji] // nkpts kj = adapted_ji_idx[ji] % nkpts buf = aoao.transpose(1,2,0).reshape(nao**2,ngrids) zij = _ao2mo.r_e2(lib.transpose(buf), moij_list[ji], ijslice_list[ji], tao, ao_loc) zij *= coulG[p0:p1,None] fswap['zij/'+str(ji)][p0:p1] = zij mokl_list = [] klslice_list = [] for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] mokl_list.append(mokl) klslice_list.append(klslice) fswap.create_dataset('zkl/'+str(kk), (ngrids,nmok*nmol), 'D') ki = adapted_ji_idx[0] // nkpts kj = adapted_ji_idx[0] % nkpts kptls = kpts[kconserv[ki, kj, :]] for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, -kptls): for kk, aoao in enumerate(aoaoks): buf = aoao.conj().transpose(1,2,0).reshape(nao**2,ngrids) zkl = _ao2mo.r_e2(lib.transpose(buf), mokl_list[kk], klslice_list[kk], tao, ao_loc) fswap['zkl/'+str(kk)][p0:p1] = zkl for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] eri_mo = lib.dot(numpy.asarray(fswap['zij/'+str(ji)]).T, numpy.asarray(fswap['zkl/'+str(kk)])) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl_list[kk], klslice_list[kk], tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) del(fswap['zij']) del(fswap['zkl']) return out
def half_e1(mol, mo_coeffs, swapfile, intor='cint2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True, ao2mopt=None): r'''Half transform arbitrary spherical AO integrals to MO integrals for the given two sets of orbitals Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeff : ndarray Transform (ij|kl) with the same set of orbitals. swapfile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. The transformed integrals are saved in blocks. Kwargs intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. verbose : int Print level max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals ao2mopt : :class:`AO2MOpt` object Precomputed data to improve perfomance Returns: None ''' time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2ij'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao ijmosym, nij_pair, moij, ijshape = \ incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym in ('s4', 's2ij')) e1buflen, mem_words, iobuf_words, ioblk_words = \ guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp) # The buffer to hold AO integrals in C code, see line (@) aobuflen = int((mem_words - iobuf_words) // (nao_pair*comp)) shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen) if ao2mopt is None: if intor == 'cint2e_sph': ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') else: ao2mopt = _ao2mo.AO2MOpt(mol, intor) if isinstance(swapfile, str): fswap = h5py.File(swapfile, 'w') else: fswap = swapfile for icomp in range(comp): g = fswap.create_group(str(icomp)) # for h5py old version log.debug('step1: tmpfile %s %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6) log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB', nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6) # transform e1 ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0) nstep = len(shranges) maxbuflen = max([x[2] for x in shranges]) bufs1 = numpy.empty((comp*maxbuflen,nao_pair)) bufs2 = numpy.empty((comp*maxbuflen,nij_pair)) for istep,sh_range in enumerate(shranges): log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \ istep+1, nstep, *(sh_range[:3])) buflen = sh_range[2] iobuf = bufs2[:comp*buflen].reshape(comp,buflen,nij_pair) nmic = len(sh_range[3]) p0 = 0 for imic, aoshs in enumerate(sh_range[3]): log.debug2(' fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \ imic+1, nmic, *aoshs) buf = bufs1[:comp*aoshs[2]] # (@) _ao2mo.nr_e1fill(intor, aoshs, mol._atm, mol._bas, mol._env, aosym, comp, ao2mopt, out=buf) buf = _ao2mo.nr_e1(buf, moij, ijshape, aosym, ijmosym) iobuf[:,p0:p0+aoshs[2]] = buf.reshape(comp,aoshs[2],-1) p0 += aoshs[2] ti2 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, buflen) for icomp in range(comp): _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp], e2buflen, None) ti0 = log.timer_debug1('transposing to disk', *ti2) bufs1 = bufs2 = None if isinstance(swapfile, str): fswap.close() return swapfile
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] eri_mo = numpy.zeros((nmoi*nmoj,nmok*nmol), dtype=numpy.complex128) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl, klslice, tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) return out
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0])) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, sign, eri_mo, 1) LpqR = LpqI = None return eri_mo elif is_zero(kpti-kptk) and is_zero(kptj-kptl): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), sign, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] nao = mo_coeffs[0].shape[0] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) blksize = int(min(max_memory*.3e6/16/nij_pair, max_memory*.3e6/16/nkl_pair, max_memory*.3e6/16/nao**2)) zij = zkl = None for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize), mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR*coulG[p0:p1,None], 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj()*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR-rskI*1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1,None] lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def general(mol, mo_coeffs, erifile, dataname='eri_mo', tmpdir=None, intor='int2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True): r'''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on the fly. Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. tmpdir : str The directory where to temporarily store the intermediate data (the half-transformed integrals). By default, it's controlled by shell environment variable ``TMPDIR``. The disk space requirement is about comp*mo_coeffs[0].shape[1]*mo_coeffs[1].shape[1]*nao**2 intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. int2e_ip_sph has 3 components. max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: None Examples: >>> from pyscf import gto >>> from pyscf import ao2mo >>> import h5py >>> def view(h5file, dataname='eri_mo'): ... f5 = h5py.File(h5file) ... print('dataset %s, shape %s' % (str(f5.keys()), str(f5[dataname].shape))) ... f5.close() >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo4), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 24) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 21) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5', compact=False) >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo2,mo2), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (55, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', dataname='new') >>> view('oh2.h5', 'new') dataset ['eri_mo', 'new'], shape (55, 55) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='int2e_ip1_sph', aosym='s1', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 100) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='int2e_ip1_sph', aosym='s2kl', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 55) ''' time_0pass = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2kl'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao if (compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) and aosym in ('s4', 's2ij')): nij_pair = nmoi*(nmoi+1) // 2 else: nij_pair = nmoi*nmoj klmosym, nkl_pair, mokl, klshape = \ incore._conc_mos(mo_coeffs[2], mo_coeffs[3], compact and aosym in ('s4', 's2kl')) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') else: assert(isinstance(erifile, h5py.Group)) feri = erifile if comp == 1: chunks = (nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (nij_pair,nkl_pair), 'f8', chunks=chunks) else: chunks = (1,nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (comp,nij_pair,nkl_pair), 'f8', chunks=chunks) if nij_pair == 0 or nkl_pair == 0: if isinstance(erifile, str): feri.close() return erifile log.debug('MO integrals %s are saved in %s/%s', intor, erifile, dataname) log.debug('num. MO ints = %.8g, required disk %.8g MB', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*8/1e6) # transform e1 if tmpdir is None: tmpdir = lib.param.TMPDIR swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) fswap = h5py.File(swapfile.name, 'w') half_e1(mol, mo_coeffs, fswap, intor, aosym, comp, max_memory, ioblk_size, log, compact) time_1pass = log.timer('AO->MO transformation for %s 1 pass'%intor, *time_0pass) ioblk_size = max(max_memory*.1, ioblk_size) iobuflen = guess_e2bufsize(ioblk_size, nij_pair, max(nao_pair,nkl_pair))[0] reading_frame = [numpy.empty((iobuflen,nao_pair)), numpy.empty((iobuflen,nao_pair))] def prefetch(icomp, row0, row1, buf): if icomp+1 < comp: icomp += 1 else: row0, row1 = row1, min(nij_pair, row1+iobuflen) icomp = 0 if row0 < row1: _load_from_h5g(fswap['%d'%icomp], row0, row1, buf) def async_read(icomp, row0, row1, thread_read): buf_current, buf_prefetch = reading_frame reading_frame[:] = [buf_prefetch, buf_current] if thread_read is None: _load_from_h5g(fswap['%d'%icomp], row0, row1, buf_current) else: thread_read.join() thread_read = lib.background_thread(prefetch, icomp, row0, row1, buf_prefetch) return buf_current[:row1-row0], thread_read def save(icomp, row0, row1, buf): if comp == 1: h5d_eri[row0:row1] = buf[:row1-row0] else: h5d_eri[icomp,row0:row1] = buf[:row1-row0] def async_write(icomp, row0, row1, buf, thread_io): if thread_io is not None: thread_io.join() thread_io = lib.background_thread(save, icomp, row0, row1, buf) return thread_io log.debug('step2: kl-pair (ao %d, mo %d), mem %.8g MB, ioblock %.8g MB', nao_pair, nkl_pair, iobuflen*nao_pair*8/1e6, iobuflen*nkl_pair*8/1e6) klaoblks = len(fswap['0']) ijmoblks = int(numpy.ceil(float(nij_pair)/iobuflen)) * comp ao_loc = mol.ao_loc_nr('cart' in intor) ti0 = time_1pass bufs1 = numpy.empty((iobuflen,nkl_pair)) buf_write = numpy.empty_like(bufs1) istep = 0 read_handler = write_handler = None for row0, row1 in prange(0, nij_pair, iobuflen): nrow = row1 - row0 for icomp in range(comp): istep += 1 log.debug1('step 2 [%d/%d], [%d,%d:%d], row = %d', istep, ijmoblks, icomp, row0, row1, nrow) buf, read_handler = async_read(icomp, row0, row1, read_handler) _ao2mo.nr_e2(buf, mokl, klshape, aosym, klmosym, ao_loc=ao_loc, out=bufs1) write_handler = async_write(icomp, row0, row1, bufs1, write_handler) bufs1, buf_write = buf_write, bufs1 # avoid flushing writing buffer ti1 = (time.clock(), time.time()) log.debug1('step 2 [%d/%d] CPU time: %9.2f, Wall time: %9.2f', istep, ijmoblks, ti1[0]-ti0[0], ti1[1]-ti0[1]) ti0 = ti1 write_handler.join() fswap.close() if isinstance(erifile, str): feri.close() log.timer('AO->MO transformation for %s 2 pass'%intor, *time_1pass) log.timer('AO->MO transformation for %s '%intor, *time_0pass) return erifile
def general(mol, mo_coeffs, erifile, dataname='eri_mo', intor='int2e', aosym='s4', comp=None, max_memory=MAX_MEMORY, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True): r'''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on the fly. Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. int2e_ip_sph has 3 components. max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: None Examples: >>> from pyscf import gto >>> from pyscf import ao2mo >>> import h5py >>> def view(h5file, dataname='eri_mo'): ... f5 = h5py.File(h5file, 'r') ... print('dataset %s, shape %s' % (str(f5.keys()), str(f5[dataname].shape))) ... f5.close() >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo4), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 24) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 21) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5', compact=False) >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo2,mo2), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (55, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', dataname='new') >>> view('oh2.h5', 'new') dataset ['eri_mo', 'new'], shape (55, 55) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='int2e_ip1_sph', aosym='s1', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 100) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='int2e_ip1_sph', aosym='s2kl', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 55) ''' if any(c.dtype == numpy.complex128 for c in mo_coeffs): raise NotImplementedError('Integral transformation for complex orbitals') time_0pass = (logger.process_clock(), logger.perf_counter()) log = logger.new_logger(mol, verbose) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] intor, comp = gto.moleintor._get_intor_and_comp(mol._add_suffix(intor), comp) assert(nao == mol.nao_nr('_cart' in intor)) aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2kl'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao if (compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) and aosym in ('s4', 's2ij')): nij_pair = nmoi*(nmoi+1) // 2 else: nij_pair = nmoi*nmoj klmosym, nkl_pair, mokl, klshape = \ incore._conc_mos(mo_coeffs[2], mo_coeffs[3], compact and aosym in ('s4', 's2kl')) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') else: assert(isinstance(erifile, h5py.Group)) feri = erifile if comp == 1: chunks = (nmoj, nmol) shape = (nij_pair, nkl_pair) else: chunks = (1, nmoj, nmol) shape = (comp, nij_pair, nkl_pair) if nij_pair == 0 or nkl_pair == 0: feri.create_dataset(dataname, shape, 'f8') if isinstance(erifile, str): feri.close() return erifile else: h5d_eri = feri.create_dataset(dataname, shape, 'f8', chunks=chunks) log.debug('MO integrals %s are saved in %s/%s', intor, erifile, dataname) log.debug('num. MO ints = %.8g, required disk %.8g MB', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*8/1e6) # transform e1 fswap = lib.H5TmpFile() half_e1(mol, mo_coeffs, fswap, intor, aosym, comp, max_memory, ioblk_size, log, compact) time_1pass = log.timer('AO->MO transformation for %s 1 pass'%intor, *time_0pass) def load(icomp, row0, row1, buf): if icomp+1 < comp: icomp += 1 else: # move to next row-block row0, row1 = row1, min(nij_pair, row1+iobuflen) icomp = 0 if row0 < row1: _load_from_h5g(fswap['%d'%icomp], row0, row1, buf) def save(icomp, row0, row1, buf): if comp == 1: h5d_eri[row0:row1] = buf[:row1-row0] else: h5d_eri[icomp,row0:row1] = buf[:row1-row0] ioblk_size = max(max_memory*.1, ioblk_size) iobuflen = guess_e2bufsize(ioblk_size, nij_pair, max(nao_pair,nkl_pair))[0] buf = numpy.empty((iobuflen,nao_pair)) buf_prefetch = numpy.empty_like(buf) outbuf = numpy.empty((iobuflen,nkl_pair)) buf_write = numpy.empty_like(outbuf) log.debug('step2: kl-pair (ao %d, mo %d), mem %.8g MB, ioblock %.8g MB', nao_pair, nkl_pair, iobuflen*nao_pair*8/1e6, iobuflen*nkl_pair*8/1e6) #klaoblks = len(fswap['0']) ijmoblks = int(numpy.ceil(float(nij_pair)/iobuflen)) * comp ao_loc = mol.ao_loc_nr('_cart' in intor) ti0 = time_1pass istep = 0 with lib.call_in_background(load) as prefetch: with lib.call_in_background(save) as async_write: _load_from_h5g(fswap['0'], 0, min(nij_pair, iobuflen), buf_prefetch) for row0, row1 in prange(0, nij_pair, iobuflen): nrow = row1 - row0 for icomp in range(comp): istep += 1 log.debug1('step 2 [%d/%d], [%d,%d:%d], row = %d', istep, ijmoblks, icomp, row0, row1, nrow) buf, buf_prefetch = buf_prefetch, buf prefetch(icomp, row0, row1, buf_prefetch) _ao2mo.nr_e2(buf[:nrow], mokl, klshape, aosym, klmosym, ao_loc=ao_loc, out=outbuf) async_write(icomp, row0, row1, outbuf) outbuf, buf_write = buf_write, outbuf # avoid flushing writing buffer ti1 = (logger.process_clock(), logger.perf_counter()) log.debug1('step 2 [%d/%d] CPU time: %9.2f, Wall time: %9.2f', istep, ijmoblks, ti1[0]-ti0[0], ti1[1]-ti0[1]) ti0 = ti1 fswap = None if isinstance(erifile, str): feri.close() log.timer('AO->MO transformation for %s 2 pass'%intor, *time_1pass) log.timer('AO->MO transformation for %s '%intor, *time_0pass) return erifile
def general(eri, mo_coeffs, erifile, dataname='eri_mo', ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE): '''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on disk. Args: eri : 8-fold reduced eri vector mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. ioblk_size : float or int The block size for IO, large block size may **not** improve performance compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Pseudocode / algorithm: u = mu v = nu l = lambda o = sigma Assume eri's are 8-fold reduced. nij/nkl_pair = npair or i*j/k*l if only transforming a subset First half transform: Initialize half_eri of size (nij_pair,npair) For lo = 1 -> npair Unpack row lo Unpack row lo to matrix E_{uv}^{lo} Transform C_ui^+*E*C_nj -> E_{ij}^{lo} Ravel or pack E_{ij}^{lo} Save E_{ij}^{lo} -> half_eri[:,lo] Second half transform: Initialize h5d_eri of size (nij_pair,nkl_pair) For ij = 1 -> nij_pair Load and unpack half_eri[ij,:] -> E_{lo}^{ij} Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij} Repack E_{kl}^{ij} Save E_{kl}^{ij} -> h5d_eri[ij,:] Each matrix is indexed by the composite index ij x kl, where ij/kl is either npair or ixj/kxl, if only a subset of MOs are being transformed. Since entire rows or columns need to be read in, the arrays are chunked such that IOBLK_SIZE = row/col x chunking col/row. For example, for the first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair, then load in IOBLK_SIZE/nkl_pair x npair for the second half transform. ------ kl -----> |jxl | ij | | v As a first guess, the chunking size is jxl. If the super-rows/cols are larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed accordingly. The pathological limiting case is where the dimensions nao_pair, nij_pair, or nkl_pair are so large that the arrays are chunked 1x1, in which case IOBLK_SIZE needs to be increased. ''' log = logger.new_logger(None, verbose) log.info('******** ao2mo disk, custom eri ********') eri_ao = numpy.asarray(eri, order='C') nao, nmoi = mo_coeffs[0].shape nmoj = mo_coeffs[1].shape[1] nao_pair = nao*(nao+1)//2 ijmosym, nij_pair, moij, ijshape = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klshape = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) ijshape = (ijshape[0], ijshape[1]-ijshape[0], ijshape[2], ijshape[3]-ijshape[2]) dtype = numpy.result_type(eri, *mo_coeffs) typesize = dtype.itemsize/1e6 # in MB if nij_pair == 0: return numpy.empty((nij_pair,nkl_pair)) ij_red = ijmosym == 's1' kl_red = klmosym == 's1' if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile,'w',libver='latest') else: assert(isinstance(erifile, h5py.Group)) feri = erifile h5d_eri = feri.create_dataset(dataname,(nij_pair,nkl_pair), dtype.char) feri_swap = lib.H5TmpFile(libver='latest') chunk_size = min(nao_pair, max(4, int(ioblk_size*1e6/8/nao_pair))) log.debug('Memory information:') log.debug(' IOBLK_SIZE (MB): {} chunk_size: {}' .format(ioblk_size, chunk_size)) log.debug(' Final disk eri size (MB): {:.3g}' .format(nij_pair*nkl_pair*typesize)) log.debug(' Half transformed eri size (MB): {:.3g}' .format(nij_pair*nao_pair*typesize)) log.debug(' RAM buffer (MB): {:.3g}' .format(nij_pair*IOBLK_SIZE*typesize*2)) if eri_ao.size == nao_pair**2: # 4-fold symmetry # half_e1 first transforms the indices which are contiguous in memory # transpose the 4-fold integrals to make ij the contiguous indices eri_ao = lib.transpose(eri_ao) ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s4 elif eri_ao.size == nao_pair*(nao_pair+1)//2: ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s8 else: raise NotImplementedError if ijmosym == 's2': fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_s2 elif nmoi <= nmoj: fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_iltj else: fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_igtj fdrv = getattr(_ao2mo.libao2mo, 'AO2MOnr_e1incore_drv') def save(piece, buf): feri_swap[str(piece)] = buf.T # transform \mu\nu -> ij cput0 = time.clock(), time.time() with lib.call_in_background(save) as async_write: for istep, (p0, p1) in enumerate(lib.prange(0, nao_pair, chunk_size)): if dtype == numpy.double: buf = numpy.empty((p1-p0, nij_pair)) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri_ao.ctypes.data_as(ctypes.c_void_p), moij.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(p0), ctypes.c_int(p1-p0), ctypes.c_int(nao), ctypes.c_int(ijshape[0]), ctypes.c_int(ijshape[1]), ctypes.c_int(ijshape[2]), ctypes.c_int(ijshape[3])) else: # complex tmp = numpy.empty((p1-p0, nao_pair)) if eri_ao.size == nao_pair**2: # 4-fold symmetry tmp = eri_ao[p0:p1] else: # 8-fold symmetry for i in range(p0, p1): tmp[i-p0] = lib.unpack_row(eri_ao, i) tmp = lib.unpack_tril(tmp, filltriu=lib.SYMMETRIC) buf = lib.einsum('xpq,pi,qj->xij', tmp, mo_coeffs[0].conj(), mo_coeffs[1]) if ij_red: buf = buf.reshape(p1-p0,-1) # grabs by row else: buf = lib.pack_tril(buf) async_write(istep, buf) log.timer('(uv|lo) -> (ij|lo)', *cput0) # transform \lambda\sigma -> kl cput1 = time.clock(), time.time() Cklam = mo_coeffs[2].conj() buf_read = numpy.empty((chunk_size,nao_pair), dtype=dtype) buf_prefetch = numpy.empty_like(buf_read) def load(start, stop, buf): if start < stop: _load_from_h5g(feri_swap, start, stop, buf) def save(start, stop, buf): if start < stop: h5d_eri[start:stop] = buf[:stop-start] with lib.call_in_background(save,load) as (async_write, prefetch): for p0, p1 in lib.prange(0, nij_pair, chunk_size): if p0 == 0: load(p0, p1, buf_prefetch) buf_read, buf_prefetch = buf_prefetch, buf_read prefetch(p1, min(p1+chunk_size, nij_pair), buf_prefetch) lo = lib.unpack_tril(buf_read[:p1-p0], filltriu=lib.SYMMETRIC) lo = lib.einsum('xpq,pi,qj->xij', lo, Cklam, mo_coeffs[3]) if kl_red: kl = lo.reshape(p1-p0,-1) else: kl = lib.pack_tril(lo) async_write(p0, p1, kl) log.timer('(ij|lo) -> (ij|kl)', *cput1) if isinstance(erifile, str): feri.close() return erifile
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = None return eri_mo elif (abs(kpti-kptk).sum() < KPT_DIFF_TOL) and (abs(kptj-kptl).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) zij = zkl = None for (LpqR, LpqI), (LrsR, LrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def general(mol, mo_coeffs, erifile, dataname='eri_mo', tmpdir=None, intor='cint2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True): r'''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on the fly. Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. tmpdir : str The directory where to temporarily store the intermediate data (the half-transformed integrals). By default, it's controlled by shell environment variable ``TMPDIR``. The disk space requirement is about comp*mo_coeffs[0].shape[1]*mo_coeffs[1].shape[1]*nao**2 intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Returns: None Examples: >>> from pyscf import gto >>> from pyscf import ao2mo >>> import h5py >>> def view(h5file, dataname='eri_mo'): ... f5 = h5py.File(h5file) ... print('dataset %s, shape %s' % (str(f5.keys()), str(f5[dataname].shape))) ... f5.close() >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> mo1 = numpy.random.random((mol.nao_nr(), 10)) >>> mo2 = numpy.random.random((mol.nao_nr(), 8)) >>> mo3 = numpy.random.random((mol.nao_nr(), 6)) >>> mo4 = numpy.random.random((mol.nao_nr(), 4)) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo4), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 24) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 21) >>> ao2mo.outcore.general(mol, (mo1,mo2,mo3,mo3), 'oh2.h5', compact=False) >>> view('oh2.h5') dataset ['eri_mo'], shape (80, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo2,mo2), 'oh2.h5') >>> view('oh2.h5') dataset ['eri_mo'], shape (55, 36) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', dataname='new') >>> view('oh2.h5', 'new') dataset ['eri_mo', 'new'], shape (55, 55) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='cint2e_ip1_sph', aosym='s1', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 100) >>> ao2mo.outcore.general(mol, (mo1,mo1,mo1,mo1), 'oh2.h5', intor='cint2e_ip1_sph', aosym='s2kl', comp=3) >>> view('oh2.h5') dataset ['eri_mo', 'new'], shape (3, 100, 55) ''' time_0pass = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2kl'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao if (compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]) and aosym in ('s4', 's2ij')): nij_pair = nmoi*(nmoi+1) // 2 else: nij_pair = nmoi*nmoj klmosym, nkl_pair, mokl, klshape = \ incore._conc_mos(mo_coeffs[2], mo_coeffs[3], compact and aosym in ('s4', 's2kl')) # if nij_pair > nkl_pair: # log.warn('low efficiency for AO to MO trans!') if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') else: assert(isinstance(erifile, h5py.Group)) feri = erifile if comp == 1: chunks = (nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (nij_pair,nkl_pair), 'f8', chunks=chunks) else: chunks = (1,nmoj,nmol) h5d_eri = feri.create_dataset(dataname, (comp,nij_pair,nkl_pair), 'f8', chunks=chunks) if nij_pair == 0 or nkl_pair == 0: if isinstance(erifile, str): feri.close() return erifile log.debug('MO integrals %s are saved in %s/%s', intor, erifile, dataname) log.debug('num. MO ints = %.8g, required disk %.8g MB', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*8/1e6) # transform e1 swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) fswap = h5py.File(swapfile.name, 'w') half_e1(mol, mo_coeffs, fswap, intor, aosym, comp, max_memory, ioblk_size, log, compact) time_1pass = log.timer('AO->MO transformation for %s 1 pass'%intor, *time_0pass) ioblk_size = max(max_memory*.2, ioblk_size) iobuflen = guess_e2bufsize(ioblk_size, nij_pair, max(nao_pair,nkl_pair))[0] reading_frame = [numpy.empty((iobuflen,nao_pair)), numpy.empty((iobuflen,nao_pair))] def prefetch(icomp, row0, row1, buf): if icomp+1 < comp: icomp += 1 else: row0, row1 = row1, min(nij_pair, row1+iobuflen) icomp = 0 if row0 < row1: _load_from_h5g(fswap['%d'%icomp], row0, row1, buf) def async_read(icomp, row0, row1, thread_read): buf_current, buf_prefetch = reading_frame reading_frame[:] = [buf_prefetch, buf_current] if thread_read is None: _load_from_h5g(fswap['%d'%icomp], row0, row1, buf_current) else: thread_read.join() thread_read = lib.background_thread(prefetch, icomp, row0, row1, buf_prefetch) return buf_current[:row1-row0], thread_read def save(icomp, row0, row1, buf): if comp == 1: h5d_eri[row0:row1] = buf[:row1-row0] else: h5d_eri[icomp,row0:row1] = buf[:row1-row0] def async_write(icomp, row0, row1, buf, thread_io): if thread_io is not None: thread_io.join() thread_io = lib.background_thread(save, icomp, row0, row1, buf) return thread_io log.debug('step2: kl-pair (ao %d, mo %d), mem %.8g MB, ioblock %.8g MB', nao_pair, nkl_pair, iobuflen*nao_pair*8/1e6, iobuflen*nkl_pair*8/1e6) klaoblks = len(fswap['0']) ijmoblks = int(numpy.ceil(float(nij_pair)/iobuflen)) * comp ao_loc = mol.ao_loc_nr('cart' in intor) ti0 = time_1pass bufs1 = numpy.empty((iobuflen,nkl_pair)) buf_write = numpy.empty_like(bufs1) istep = 0 read_handler = write_handler = None for row0, row1 in prange(0, nij_pair, iobuflen): nrow = row1 - row0 for icomp in range(comp): istep += 1 log.debug1('step 2 [%d/%d], [%d,%d:%d], row = %d', istep, ijmoblks, icomp, row0, row1, nrow) buf, read_handler = async_read(icomp, row0, row1, read_handler) _ao2mo.nr_e2(buf, mokl, klshape, aosym, klmosym, ao_loc=ao_loc, out=bufs1) write_handler = async_write(icomp, row0, row1, bufs1, write_handler) bufs1, buf_write = buf_write, bufs1 # avoid flushing writing buffer ti1 = (time.clock(), time.time()) log.debug1('step 2 [%d/%d] CPU time: %9.2f, Wall time: %9.2f', istep, ijmoblks, ti1[0]-ti0[0], ti1[1]-ti0[1]) ti0 = ti1 write_handler.join() fswap.close() if isinstance(erifile, str): feri.close() log.timer('AO->MO transformation for %s 2 pass'%intor, *time_1pass) log.timer('AO->MO transformation for %s '%intor, *time_0pass) return erifile
def get_sigmaR_diag(gw, omega, kn, orbp, ef, freqs, qij, q_abs): ''' Compute self-energy for poles inside coutour (more and more expensive away from Fermi surface) ''' mo_energy = np.array(gw._scf.mo_energy) mo_coeff = np.array(gw._scf.mo_coeff) nocc = gw.nocc nmo = gw.nmo nkpts = gw.nkpts kpts = gw.kpts nw = len(freqs) mydf = gw.with_df # possible kpts shift center kscaled = gw.mol.get_scaled_kpts(kpts) kscaled -= kscaled[0] idx = [] for k in range(nkpts): if omega > ef: fm = 1.0 idx.append( np.where((mo_energy[k] < omega) & (mo_energy[k] > ef))[0]) else: fm = -1.0 idx.append( np.where((mo_energy[k] > omega) & (mo_energy[k] < ef))[0]) sigmaR = 0j for kL in range(nkpts): # Lij: (ki, L, i, j) for looping every kL Lij = [] # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) kidx = np.zeros((nkpts), dtype=np.int64) kidx_r = np.zeros((nkpts), dtype=np.int64) for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 if is_kconserv: kidx[i] = j kidx_r[j] = i km = kidx_r[kn] if len(idx[km]) > 0: for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] is_kconserv = np.linalg.norm( np.round(kconserv) - kconserv) < 1e-12 if is_kconserv: kidx[i] = j kidx_r[j] = i #logger.debug(gw, "Read Lpq (kL: %s / %s, ki: %s, kj: %s)"%(kL+1, nkpts, i, j)) Lij_out = None # Read (L|pq) and ao2mo transform to (L|ij) Lpq = [] for LpqR, LpqI, sign in mydf.sr_loop( [kpti, kptj], max_memory=0.1 * gw._scf.max_memory, compact=False): Lpq.append(LpqR + LpqI * 1.0j) # support uneqaul naux on different k points Lpq = np.vstack(Lpq).reshape(-1, nmo**2) tao = [] ao_loc = None moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] Lij_out = _ao2mo.r_e2(Lpq, moij, ijslice, tao, ao_loc, out=Lij_out) Lij.append(Lij_out.reshape(-1, nmo, nmo)) Lij = np.asarray(Lij) naux = Lij.shape[1] if kL == 0: km = kidx_r[kn] if len(idx[km]) > 0: for m in idx[km]: em = mo_energy[km][m] - omega # body dielectric matrix eps_body Pi = get_rho_response_R(gw, abs(em), mo_energy, Lij, kL, kidx) eps_body_inv = np.linalg.inv(np.eye(naux) - Pi) if gw.fc and m == orbp: # head dielectric matrix eps_00 Pi_00 = get_rho_response_head_R( gw, abs(em), mo_energy, qij) eps_00 = 1. - 4. * np.pi / np.linalg.norm( q_abs[0])**2 * Pi_00 # wings dielectric matrix eps_P0 Pi_P0 = get_rho_response_wing_R( gw, abs(em), mo_energy, Lij, qij) eps_P0 = -np.sqrt(4. * np.pi) / np.linalg.norm( q_abs[0]) * Pi_P0 # inverse dielectric matrix eps_inv_00 = 1. / (eps_00 - np.dot( np.dot(eps_P0.conj(), eps_body_inv), eps_P0)) eps_inv_P0 = -eps_inv_00 * np.dot( eps_body_inv, eps_P0) eps_inv_PQ = eps_body_inv # body Qmn = einsum('P,PQ->Q', Lij[km][:, m, orbp].conj(), eps_inv_PQ - np.eye(naux)) Wmn = 1. / nkpts * einsum('Q,Q->', Qmn, Lij[km][:, m, orbp]) sigmaR += fm * Wmn if gw.fc and m == orbp: # head correction Del_00 = 2. / np.pi * (6. * np.pi**2 / gw.mol.vol / nkpts)**(1. / 3.) * ( eps_inv_00 - 1.) sigmaR += fm * Del_00 # wings correction wings_const = np.sqrt( gw.mol.vol / 4. / np.pi**3) * (6. * np.pi**2 / gw.mol.vol / nkpts)**(2. / 3.) Wn_P0 = einsum('P,P->', Lij[kn][:, m, orbp].conj(), eps_inv_P0) Wn_P0 = Wn_P0.real * 2. sigmaR += fm * wings_const * Wn_P0 else: km = kidx_r[kn] if len(idx[km]) > 0: for m in idx[km]: em = mo_energy[km][m] - omega Pi = get_rho_response_R(gw, abs(em), mo_energy, Lij, kL, kidx) Pi_inv = np.linalg.inv(np.eye(naux) - Pi) - np.eye(naux) Qmn = einsum('P,PQ->Q', Lij[km][:, m, orbp].conj(), Pi_inv) Wmn = 1. / nkpts * einsum('Q,Q->', Qmn, Lij[km][:, m, orbp]) sigmaR += fm * Wmn return sigmaR
def half_e1(mol, mo_coeffs, swapfile, intor='cint2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True, ao2mopt=None): r'''Half transform arbitrary spherical AO integrals to MO integrals for the given two sets of orbitals Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeff : ndarray Transform (ij|kl) with the same set of orbitals. swapfile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. The transformed integrals are saved in blocks. Kwargs intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. verbose : int Print level max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals ao2mopt : :class:`AO2MOpt` object Precomputed data to improve perfomance Returns: None ''' time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2ij'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao ijmosym, nij_pair, moij, ijshape = \ incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym in ('s4', 's2ij')) e1buflen, mem_words, iobuf_words, ioblk_words = \ guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp) ioblk_size = ioblk_words * 8/1e6 # The buffer to hold AO integrals in C code, see line (@) aobuflen = max(int((mem_words - 2*comp*e1buflen*nij_pair) // (nao_pair*comp)), IOBUF_ROW_MIN) shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen) if ao2mopt is None: if intor == 'cint2e_sph': ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') else: ao2mopt = _ao2mo.AO2MOpt(mol, intor) if isinstance(swapfile, str): fswap = h5py.File(swapfile, 'w') else: fswap = swapfile for icomp in range(comp): g = fswap.create_group(str(icomp)) # for h5py old version log.debug('step1: tmpfile %s %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6) log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB', nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6) nstep = len(shranges) e1buflen = max([x[2] for x in shranges]) e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, e1buflen) def save(istep, iobuf): for icomp in range(comp): _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp], e2buflen, None) def async_write(istep, iobuf, thread_io): if thread_io is not None: thread_io.join() thread_io = lib.background_thread(save, istep, iobuf) return thread_io # transform e1 ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0) bufs1 = numpy.empty((comp*e1buflen,nao_pair)) bufs2 = numpy.empty((comp*e1buflen,nij_pair)) buf_write = numpy.empty_like(bufs2) write_handler = None for istep,sh_range in enumerate(shranges): log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \ istep+1, nstep, *(sh_range[:3])) buflen = sh_range[2] iobuf = numpy.ndarray((comp,buflen,nij_pair), buffer=bufs2) nmic = len(sh_range[3]) p0 = 0 for imic, aoshs in enumerate(sh_range[3]): log.debug2(' fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \ imic+1, nmic, *aoshs) buf = numpy.ndarray((comp*aoshs[2],nao_pair), buffer=bufs1) # (@) _ao2mo.nr_e1fill(intor, aoshs, mol._atm, mol._bas, mol._env, aosym, comp, ao2mopt, out=buf) buf = _ao2mo.nr_e1(buf, moij, ijshape, aosym, ijmosym) iobuf[:,p0:p0+aoshs[2]] = buf.reshape(comp,aoshs[2],-1) p0 += aoshs[2] ti0 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) write_handler = async_write(istep, iobuf, write_handler) bufs2, buf_write = buf_write, bufs2 # avoid flushing writing buffer write_handler.join() bufs1 = bufs2 = None if isinstance(swapfile, str): fswap.close() return swapfile
def grad_elec_auxresponse_dferi (mc_grad, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None, atmlst=None, max_memory=None, dferi=None, incl_2c=True): ''' Evaluate the [(P'|ij) + (P'|Q) g_Qij] d_Pij contribution to the electronic gradient, where d_Pij is the DF-2RDM obtained by solve_df_rdm2 and g_Qij solves (P|Q) g_Qij = (P|ij). The caller must symmetrize if necessary (i.e., (P|Q) d_Qij = (P|kl) d_ijkl <-> (P|Q) d_Qkl = (P|ij) d_ijkl in order to get at Q'). Args: mc_grad: MC-SCF gradients method object Kwargs: mc_cas: ndarray, list, or tuple containing active-space MO coefficients If a tuple of length 2, the same pair of MO sets are assumed to apply to the internally-contracted and externally-contracted indices of the DF-2rdm: (P|Q)d_Qij = (P|kl)d_ijkl -> (P|Q)d_Qij = (P|ij)d_ijij If a tuple of length 4, the 4 MO sets are applied to ijkl above in that order (first two external, last two internal). ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis. Not used if dfcasdm2 is provided. dfcasdm2: ndarray, tuple, or list containing DF-2rdm in mo_cas basis. Computed by solve_df_rdm2 if omitted. casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis. Computed by mc_grad.fcisolver.make_rdm12 (ci,...) if omitted. atmlst: list of integers List of nonfrozen atoms, as in grad_elec functions. Defaults to list (range (mol.natm)) max_memory: int Maximum memory usage in MB dferi: ndarray containing g_Pij for optional precalculation incl_2c: bool If False, omit the terms depending on (P'|Q) Returns: dE: list of ndarray of shape (len (atmlst), 3) ''' if isinstance (mc_grad, GradientsBasics): mc = mc_grad.base else: mc = mc_grad mol = mc_grad.mol auxmol = mc.with_df.auxmol ncore, ncas, nao, naux, nbas = mc.ncore, mc.ncas, mol.nao, auxmol.nao, mol.nbas nocc = ncore + ncas npair = nao * (nao + 1) // 2 if mo_cas is None: mo_cas = mc.mo_coeff[:,ncore:nocc] if max_memory is None: max_memory = mc.max_memory if isinstance (mo_cas, np.ndarray) and mo_cas.ndim == 2: mo_cas = (mo_cas,)*4 elif len (mo_cas) == 2: mo_cas = (mo_cas[0], mo_cas[1], mo_cas[0], mo_cas[1]) elif len (mo_cas) == 4: mo_cas = tuple (mo_cas) else: raise RuntimeError ('Invalid shape of np.asarray (mo_cas): {}'.format (mo_cas.shape)) nmo = [mo.shape[1] for mo in mo_cas] if atmlst is None: atmlst = list (range (mol.natm)) if ci is None: ci = mc.ci if dfcasdm2 is None: dfcasdm2 = solve_df_rdm2 (mc, mo_cas=mo_cas[2:], ci=ci, casdm2=casdm2) # d_Pij = (P|Q)^{-1} (Q|kl) d_ijkl nset = len (dfcasdm2) dE = np.zeros ((nset, naux, 3)) dfcasdm2 = np.array (dfcasdm2) # Shape dfcasdm2 mosym, nmo_pair, mo_conc, mo_slice = _conc_mos(mo_cas[0], mo_cas[1], compact=True) if 's2' in mosym: assert (nmo[0] == nmo[1]), 'How did I get {} with nmo[0] = {} and nmo[1] = {}'.format (mosym, nmo[0], nmo[1]) dfcasdm2 = dfcasdm2.reshape (nset*naux, nmo[0], nmo[1]) dfcasdm2 += dfcasdm2.transpose (0,2,1) diag_idx = np.arange(nmo[0]) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx dfcasdm2 = lib.pack_tril (np.ascontiguousarray (dfcasdm2)) dfcasdm2[:,diag_idx] *= 0.5 dfcasdm2 = dfcasdm2.reshape (nset, naux, nmo_pair) # Do 2c part. Assume memory is no object if incl_2c: int2c = auxmol.intor('int2c2e_ip1') if (dferi is None): dferi = solve_df_eri (mc, mo_cas=mo_cas[:2]).reshape (naux, nmo_pair) # g_Pij = (P|Q)^{-1} (Q|ij) int3c = np.dot (int2c, dferi) # (P'|Q) g_Qij dE += lib.einsum ('npi,xpi->npx', dfcasdm2, int3c) # d_Pij (P'|Q) g_Qij int2c = int3c = dferi = None # Set up 3c part get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij') max_memory -= lib.current_memory()[0] blklen = 6*npair blksize = int (min (max (max_memory * 1e6 / 8 / blklen, 20), 240)) aux_loc = auxmol.ao_loc aux_ranges = balance_partition(aux_loc, blksize) # Iterate over auxbasis range and do 3c part for shl0, shl1, nL in aux_ranges: p0, p1 = aux_loc[shl0], aux_loc[shl1] int3c = get_int3c ((0, nbas, 0, nbas, shl0, shl1)) # (uv|P'); shape = (3,npair,p1-p0) int3c = np.ascontiguousarray (int3c.transpose (0,2,1).reshape (3*(p1-p0), npair)) int3c = _ao2mo.nr_e2(int3c, mo_conc, mo_slice, aosym='s2', mosym=mosym) int3c = int3c.reshape (3,p1-p0,nmo_pair) int3c = np.ascontiguousarray (int3c) dE[:,p0:p1,:] -= lib.einsum ('npi,xpi->npx', dfcasdm2[:,p0:p1,:], int3c) # Ravel to atoms auxslices = auxmol.aoslice_by_atom () dE = np.array ([dE[:,p0:p1].sum (axis=1) for p0, p1 in auxslices[:,2:]]).transpose (1,0,2) return np.ascontiguousarray (dE)
def get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=None, max_memory=8000): ''' Compute GW correlation self-energy (diagonal elements) in MO basis on imaginary axis ''' mo_energy = np.array(gw._scf.mo_energy) mo_coeff = np.array(gw._scf.mo_coeff) nocca, noccb = gw.nocc nmoa, nmob = gw.nmo nkpts = gw.nkpts kpts = gw.kpts nklist = len(kptlist) nw = len(freqs) norbs = len(orbs) mydf = gw.with_df # possible kpts shift kscaled = gw.mol.get_scaled_kpts(kpts) kscaled -= kscaled[0] # This code does not support metals h**o = -99. lumo = 99. for k in range(nkpts): if h**o < max(mo_energy[0, k][nocca - 1], mo_energy[1, k][noccb - 1]): h**o = max(mo_energy[0, k][nocca - 1], mo_energy[1, k][noccb - 1]) if lumo > min(mo_energy[0, k][nocca], mo_energy[1, k][noccb]): lumo = min(mo_energy[0, k][nocca], mo_energy[1, k][noccb]) if (lumo - h**o) < 1e-3: logger.warn(gw, 'Current KUGW is not supporting metals!') ef = (h**o + lumo) / 2. # Integration on numerical grids if iw_cutoff is not None: nw_sigma = sum(iw < iw_cutoff for iw in freqs) + 1 else: nw_sigma = nw + 1 # Compute occ for -iw and vir for iw separately # to avoid branch cuts in analytic continuation omega_occ = np.zeros((nw_sigma), dtype=np.complex128) omega_vir = np.zeros((nw_sigma), dtype=np.complex128) omega_occ[0] = 1j * 0. omega_occ[1:] = -1j * freqs[:(nw_sigma - 1)] omega_vir[0] = 1j * 0. omega_vir[1:] = 1j * freqs[:(nw_sigma - 1)] orbs_occ_a = [i for i in orbs if i < nocca] orbs_occ_b = [i for i in orbs if i < noccb] norbs_occ_a = len(orbs_occ_a) norbs_occ_b = len(orbs_occ_b) emo_occ_a = np.zeros((nkpts, nmoa, nw_sigma), dtype=np.complex128) emo_occ_b = np.zeros((nkpts, nmob, nw_sigma), dtype=np.complex128) emo_vir_a = np.zeros((nkpts, nmoa, nw_sigma), dtype=np.complex128) emo_vir_b = np.zeros((nkpts, nmob, nw_sigma), dtype=np.complex128) for k in range(nkpts): emo_occ_a[k] = omega_occ[None, :] + ef - mo_energy[0, k][:, None] emo_occ_b[k] = omega_occ[None, :] + ef - mo_energy[1, k][:, None] emo_vir_a[k] = omega_vir[None, :] + ef - mo_energy[0, k][:, None] emo_vir_b[k] = omega_vir[None, :] + ef - mo_energy[1, k][:, None] sigma = np.zeros((2, nklist, norbs, nw_sigma), dtype=np.complex128) omega = np.zeros((2, norbs, nw_sigma), dtype=np.complex128) for s in range(2): for p in range(norbs): orbp = orbs[p] if orbp < gw.nocc[s]: omega[s, p] = omega_occ.copy() else: omega[s, p] = omega_vir.copy() if gw.fc: # Set up q mesh for q->0 finite size correction q_pts = np.array([1e-3, 0, 0]).reshape(1, 3) nq_pts = len(q_pts) q_abs = gw.mol.get_abs_kpts(q_pts) # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) qij = get_qij(gw, q_abs[0], mo_coeff) for kL in range(nkpts): # Lij: (2, ki, L, i, j) for looping every kL #Lij = np.zeros((2,nkpts,naux,nmoa,nmoa),dtype=np.complex128) Lij = [] # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) kidx = np.zeros((nkpts), dtype=np.int64) kidx_r = np.zeros((nkpts), dtype=np.int64) for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 if is_kconserv: kidx[i] = j kidx_r[j] = i logger.debug( gw, "Read Lpq (kL: %s / %s, ki: %s, kj: %s)" % (kL + 1, nkpts, i, j)) Lij_out_a = None Lij_out_b = None # Read (L|pq) and ao2mo transform to (L|ij) Lpq = [] for LpqR, LpqI, sign in mydf.sr_loop([kpti, kptj], max_memory=0.1 * gw._scf.max_memory, compact=False): Lpq.append(LpqR + LpqI * 1.0j) Lpq = np.vstack(Lpq).reshape(-1, nmoa**2) moija, ijslicea = _conc_mos(mo_coeff[0, i], mo_coeff[0, j])[2:] moijb, ijsliceb = _conc_mos(mo_coeff[1, i], mo_coeff[1, j])[2:] tao = [] ao_loc = None Lij_out_a = _ao2mo.r_e2(Lpq, moija, ijslicea, tao, ao_loc, out=Lij_out_a) tao = [] ao_loc = None Lij_out_b = _ao2mo.r_e2(Lpq, moijb, ijsliceb, tao, ao_loc, out=Lij_out_b) Lij.append( np.asarray((Lij_out_a.reshape(-1, nmoa, nmoa), Lij_out_b.reshape(-1, nmob, nmob)))) Lij = np.asarray(Lij) Lij = Lij.transpose(1, 0, 2, 3, 4) naux = Lij.shape[2] if kL == 0: for w in range(nw): # body dielectric matrix eps_body Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) eps_body_inv = np.linalg.inv(np.eye(naux) - Pi) if gw.fc: # head dielectric matrix eps_00 Pi_00 = get_rho_response_head(gw, freqs[w], mo_energy, qij) eps_00 = 1. - 4. * np.pi / np.linalg.norm( q_abs[0])**2 * Pi_00 # wings dielectric matrix eps_P0 Pi_P0 = get_rho_response_wing(gw, freqs[w], mo_energy, Lij, qij) eps_P0 = -np.sqrt(4. * np.pi) / np.linalg.norm( q_abs[0]) * Pi_P0 # inverse dielectric matrix eps_inv_00 = 1. / (eps_00 - np.dot( np.dot(eps_P0.conj(), eps_body_inv), eps_P0)) eps_inv_P0 = -eps_inv_00 * np.dot(eps_body_inv, eps_P0) # head correction Del_00 = 2. / np.pi * (6. * np.pi**2 / gw.mol.vol / nkpts )**(1. / 3.) * (eps_inv_00 - 1.) eps_inv_PQ = eps_body_inv g0_occ_a = wts[w] * emo_occ_a / (emo_occ_a**2 + freqs[w]**2) g0_occ_b = wts[w] * emo_occ_b / (emo_occ_b**2 + freqs[w]**2) g0_vir_a = wts[w] * emo_vir_a / (emo_vir_a**2 + freqs[w]**2) g0_vir_b = wts[w] * emo_vir_b / (emo_vir_b**2 + freqs[w]**2) for k in range(nklist): kn = kptlist[k] # Find km that conserves with kn and kL (-km+kn+kL=G) km = kidx_r[kn] Qmn_a = einsum('Pmn,PQ->Qmn', Lij[0, km][:, :, orbs].conj(), eps_inv_PQ - np.eye(naux)) Qmn_b = einsum('Pmn,PQ->Qmn', Lij[1, km][:, :, orbs].conj(), eps_inv_PQ - np.eye(naux)) Wmn_a = 1. / nkpts * einsum('Qmn,Qmn->mn', Qmn_a, Lij[0, km][:, :, orbs]) Wmn_b = 1. / nkpts * einsum('Qmn,Qmn->mn', Qmn_b, Lij[1, km][:, :, orbs]) sigma[0, k][:norbs_occ_a] += -einsum( 'mn,mw->nw', Wmn_a[:, :norbs_occ_a], g0_occ_a[km]) / np.pi sigma[1, k][:norbs_occ_b] += -einsum( 'mn,mw->nw', Wmn_b[:, :norbs_occ_b], g0_occ_b[km]) / np.pi sigma[0, k][norbs_occ_a:] += -einsum( 'mn,mw->nw', Wmn_a[:, norbs_occ_a:], g0_vir_a[km]) / np.pi sigma[1, k][norbs_occ_b:] += -einsum( 'mn,mw->nw', Wmn_b[:, norbs_occ_b:], g0_vir_b[km]) / np.pi if gw.fc: # apply head correction assert (kn == km) sigma[0, k][:norbs_occ_a] += -Del_00 * g0_occ_a[kn][ orbs][:norbs_occ_a] / np.pi sigma[0, k][norbs_occ_a:] += -Del_00 * g0_vir_a[kn][ orbs][norbs_occ_a:] / np.pi sigma[1, k][:norbs_occ_b] += -Del_00 * g0_occ_b[kn][ orbs][:norbs_occ_b] / np.pi sigma[1, k][norbs_occ_b:] += -Del_00 * g0_vir_b[kn][ orbs][norbs_occ_b:] / np.pi # apply wing correction Wn_P0_a = einsum('Pnm,P->nm', Lij[0, kn], eps_inv_P0).diagonal() Wn_P0_b = einsum('Pnm,P->nm', Lij[1, kn], eps_inv_P0).diagonal() Wn_P0_a = Wn_P0_a.real * 2. Wn_P0_b = Wn_P0_b.real * 2. Del_P0_a = np.sqrt(gw.mol.vol / 4. / np.pi**3) * ( 6. * np.pi**2 / gw.mol.vol / nkpts)**(2. / 3.) * Wn_P0_a[orbs] Del_P0_b = np.sqrt(gw.mol.vol / 4. / np.pi**3) * ( 6. * np.pi**2 / gw.mol.vol / nkpts)**(2. / 3.) * Wn_P0_b[orbs] sigma[0, k][:norbs_occ_a] += -einsum( 'n,nw->nw', Del_P0_a[:norbs_occ_a], g0_occ_a[kn][orbs][:norbs_occ_a]) / np.pi sigma[0, k][norbs_occ_a:] += -einsum( 'n,nw->nw', Del_P0_a[norbs_occ_a:], g0_vir_a[kn][orbs][norbs_occ_a:]) / np.pi sigma[1, k][:norbs_occ_b] += -einsum( 'n,nw->nw', Del_P0_b[:norbs_occ_b], g0_occ_b[kn][orbs][:norbs_occ_b]) / np.pi sigma[1, k][norbs_occ_b:] += -einsum( 'n,nw->nw', Del_P0_b[norbs_occ_b:], g0_vir_b[kn][orbs][norbs_occ_b:]) / np.pi else: for w in range(nw): Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) Pi_inv = np.linalg.inv(np.eye(naux) - Pi) - np.eye(naux) g0_occ_a = wts[w] * emo_occ_a / (emo_occ_a**2 + freqs[w]**2) g0_occ_b = wts[w] * emo_occ_b / (emo_occ_b**2 + freqs[w]**2) g0_vir_a = wts[w] * emo_vir_a / (emo_vir_a**2 + freqs[w]**2) g0_vir_b = wts[w] * emo_vir_b / (emo_vir_b**2 + freqs[w]**2) for k in range(nklist): kn = kptlist[k] # Find km that conserves with kn and kL (-km+kn+kL=G) km = kidx_r[kn] Qmn_a = einsum('Pmn,PQ->Qmn', Lij[0, km][:, :, orbs].conj(), Pi_inv) Qmn_b = einsum('Pmn,PQ->Qmn', Lij[1, km][:, :, orbs].conj(), Pi_inv) Wmn_a = 1. / nkpts * einsum('Qmn,Qmn->mn', Qmn_a, Lij[0, km][:, :, orbs]) Wmn_b = 1. / nkpts * einsum('Qmn,Qmn->mn', Qmn_b, Lij[1, km][:, :, orbs]) sigma[0, k][:norbs_occ_a] += -einsum( 'mn,mw->nw', Wmn_a[:, :norbs_occ_a], g0_occ_a[km]) / np.pi sigma[1, k][:norbs_occ_b] += -einsum( 'mn,mw->nw', Wmn_b[:, :norbs_occ_b], g0_occ_b[km]) / np.pi sigma[0, k][norbs_occ_a:] += -einsum( 'mn,mw->nw', Wmn_a[:, norbs_occ_a:], g0_vir_a[km]) / np.pi sigma[1, k][norbs_occ_b:] += -einsum( 'mn,mw->nw', Wmn_b[:, norbs_occ_b:], g0_vir_b[km]) / np.pi return sigma, omega
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 eri_mo = pwdf_ao2mo.general(mydf, mo_coeffs, kptijkl, compact) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) if sym: eri_mo *= .5 # because we'll do +cc later ijR = klR = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, j3cR, klR, klmosym, mokl, klslice, False) lib.ddot(ijR.T, klR, 1, eri_mo, 1) if not sym: ijR, klR = _dtrans(j3cR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, False) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = j3cR = j3cI = None if sym: eri_mo = lib.transpose_sum(eri_mo, inplace=True) return eri_mo #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_lk = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): bufL = LpqR+LpqI*1j bufj = j3cR+j3cI*1j zij, zlk = _ztrans(bufL, zij, moij, ijslice, bufj, zlk, molk, lkslice, False) lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1) if not sym: zij, zlk = _ztrans(bufj, zij, moij, ijslice, bufL, zlk, molk, lkslice, False) lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1) LpqR = LpqI = j3cR = j3cI = bufL = bufj = None if sym: eri_lk += lib.transpose(eri_lk).conj() nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_lk = lib.transpose(eri_lk.reshape(-1,nmol,nmok), axes=(0,2,1)) eri_mo += eri_lk.reshape(nij_pair,nlk_pair) return eri_mo #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] max_memory *= .5 zij = zkl = None for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, jrsR+jrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) zij, zkl = _ztrans(jpqR+jpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None return eri_mo