def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1, 3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique( numpy.vstack((self.kpts_band, kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) uniq_idx = unique(self.kpts)[1] kpts = numpy.asarray(self.kpts)[uniq_idx] if self.kpts_band is None: kband_uniq = numpy.zeros((0, 3)) else: kband_uniq = [ k for k in self.kpts_band if len(member(k, kpts)) == 0 ] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts, kband_uniq]) kptij_lst = numpy.hstack((kall, kall)).reshape(-1, 2, 3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i + 1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn( self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn( self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) # Remove duplicated k-points. Duplicated kpts may lead to a buffer # located in incore.wrap_int3c larger than necessary. Integral code # only fills necessary part of the buffer, leaving some space in the # buffer unfilled. uniq_idx = unique(self.kpts)[1] kpts = numpy.asarray(self.kpts)[uniq_idx] if self.kpts_band is None: kband_uniq = numpy.zeros((0,3)) else: kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts,kband_uniq]) kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (logger.process_clock(), logger.perf_counter()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def _kpts_build(self, kpts_band=None): if self.kpts_band is not None: self.kpts_band = np.reshape(self.kpts_band, (-1, 3)) if kpts_band is not None: kpts_band = np.reshape(kpts_band, (-1, 3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(np.vstack( (self.kpts_band, kpts_band)))[0]
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) if self.kpts_band is None: kpts = self.kpts kband_uniq = numpy.zeros((0,3)) else: kpts = self.kpts kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts,kband_uniq]) kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def build(self, j_only=None, with_j3c=True, kpts_band=None): log = Logger(self.stdout, self.verbose) if self.kpts_band is not None: self.kpts_band = np.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = np.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(np.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = df.df.make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) if self.kpts_band is None: kpts = self.kpts kband_uniq = np.zeros((0,3)) else: kpts = self.kpts kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0] if j_only is None: j_only = self._j_only if j_only: kall = np.vstack([kpts,kband_uniq]) kptij_lst = np.hstack((kall,kall)).reshape(-1,2,3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = np.asarray(kptij_lst) t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst) t1 = log.timer('j3c', *t1) return self
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) mo_ids = [id(x) for x in mo_coeff_kpts] moTs = [] coords = cell.gen_uniform_grids(mydf.mesh) aos = mydf._numint.eval_ao(cell, coords, kpts) for n, mo_id in enumerate(mo_ids): if mo_id in mo_ids[:n]: moTs.append(moTs[mo_ids[:n].index(mo_id)]) else: moTs.append([lib.dot(mo.T, aos[k].T) for k,mo in enumerate(mo_coeff_kpts[n])]) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) # To hold intermediates fswap = lib.H5TmpFile() kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] ki = adapted_ji_idx[0] // nkpts kj = adapted_ji_idx[0] % nkpts coulG = tools.get_coulG(cell, q, mesh=mydf.mesh) coulG *= (cell.vol/ngrids) * factor phase = numpy.exp(-1j * numpy.dot(coords, q)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokT = moTs[2][kk] molT = moTs[3][kl] mo_pairs = numpy.einsum('ig,g,jg->ijg', mokT.conj(), phase.conj(), molT) v = tools.ifft(mo_pairs.reshape(-1,ngrids), mydf.mesh) v *= coulG v = tools.fft(v.reshape(-1,ngrids), mydf.mesh) v *= phase fswap['zkl/'+str(kk)] = v for ji_idx in adapted_ji_idx: ki = ji_idx // nkpts kj = ji_idx % nkpts for kk in range(nkpts): moiT = moTs[0][ki] mojT = moTs[1][kj] mo_pairs = numpy.einsum('ig,jg->ijg', moiT.conj(), mojT) tmp = lib.dot(mo_pairs.reshape(-1,ngrids), numpy.asarray(fswap['zkl/'+str(kk)]).T) if dtype == numpy.double: tmp = tmp.real out[ki,kj,kk] = tmp.reshape(eri_shape[3:]) del(fswap['zkl']) return out
def _make_j3c(mydf, cell, auxcell, kptij_lst): max_memory = max(2000, mydf.max_memory-pyscflib.current_memory()[0]) fused_cell, fuse = df.df.fuse_auxcell(mydf, auxcell) log = Logger(mydf.stdout, mydf.verbose) nao, nfao = cell.nao_nr(), fused_cell.nao_nr() jobs = np.arange(fused_cell.nbas) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) j3c_junk = ctf.zeros([len(kptij_lst), nao**2, nfao], dtype=np.complex128) t1 = t0 = (time.clock(), time.time()) idx_full = np.arange(j3c_junk.size).reshape(j3c_junk.shape) if len(tasks) > 0: q0, q1 = tasks[0], tasks[-1] + 1 shls_slice = (0, cell.nbas, 0, cell.nbas, q0, q1) bstart, bend = fused_cell.ao_loc_nr()[q0], fused_cell.ao_loc_nr()[q1] idx = idx_full[:,:,bstart:bend].ravel() tmp = df.incore.aux_e2(cell, fused_cell, intor='int3c2e', aosym='s2', kptij_lst=kptij_lst, shls_slice=shls_slice) nao_pair = nao**2 if tmp.shape[-2] != nao_pair and tmp.ndim == 2: tmp = pyscflib.unpack_tril(tmp, axis=0).reshape(nao_pair,-1) j3c_junk.write(idx, tmp.ravel()) else: j3c_junk.write([],[]) t1 = log.timer('j3c_junk', *t1) naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = pyscflib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis mydf.kptij_lst = kptij_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) jobs = np.arange(len(uniq_kpts)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) j2c = ctf.zeros([len(uniq_kpts),naux,naux], dtype=np.complex128) a = cell.lattice_vectors() / (2*np.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = np.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = np.rint(kdif) mask = np.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = np.where(mask)[0] return uniq_kptji_ids def cholesky_decomposed_metric(j2c_kptij): j2c_negative = None try: j2c_kptij = scipy.linalg.cholesky(j2c_kptij, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: w, v = scipy.linalg.eigh(j2c_kptij) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], np.count_nonzero(w<mydf.linear_dep_threshold)) v1 = np.zeros(v.T.shape, dtype=v.dtype) v1[w>mydf.linear_dep_threshold,:] = v[:,w>mydf.linear_dep_threshold].conj().T v1[w>mydf.linear_dep_threshold,:] /= np.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c_kptij = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = np.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = np.zeros(v1.shape, dtype=v1.dtype) j2c_negative[idx,:] = (v[:,idx]/np.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c_kptij, j2c_negative, j2ctag for itask in range(ntasks): if itask >= len(tasks): j2c.write([],[]) continue k = tasks[itask] kpt = uniq_kpts[k] j2ctmp = np.asarray(fused_cell.pbc_intor('int2c2e', hermi=1, kpts=kpt)) coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in pyscflib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T if is_zero(kpt): j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T).real j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T else: j2ctmp[naux:] -= np.dot(aoaux[naux:].conj()*coulG[p0:p1].conj(), aoaux.T) j2ctmp[:naux,naux:] = j2ctmp[naux:,:naux].T.conj() tmp = fuse(fuse(j2ctmp).T).T idx = k * naux**2 + np.arange(naux**2) j2c.write(idx, tmp.ravel()) j2ctmp = tmp = None coulG = None t1 = log.timer('j2c', *t1) j3c = ctf.zeros([len(kpt_ji),nao,nao,naux], dtype=np.complex128) jobs = np.arange(len(kpt_ji)) tasks = list(static_partition(jobs)) ntasks = max(comm.allgather(len(tasks))) for itask in range(ntasks): if itask >= len(tasks): j2c_ji = j2c.read([]) j3ctmp = j3c_junk.read([]) j3c.write([],[]) continue idx_ji = tasks[itask] kpti, kptj = kptij_lst[idx_ji] idxi, idxj = member(kpti, mydf.kpts), member(kptj, mydf.kpts) uniq_idx = uniq_inverse[idx_ji] kpt = uniq_kpts[uniq_idx] id_eq = kconserve_indices(-kpt) id_conj = kconserve_indices(kpt) id_conj = np.asarray([i for i in id_conj if i not in id_eq], dtype=int) id_full = np.hstack((id_eq, id_conj)) map_id, conj = min(id_full), np.argmin(id_full) >=len(id_eq) j2cidx = map_id * naux**2 + np.arange(naux**2) j2c_ji = j2c.read(j2cidx).reshape(naux, naux) # read to be added j2c_ji, j2c_negative, j2ctag = cholesky_decomposed_metric(j2c_ji) if conj: j2c_ji = j2c_ji.conj() shls_slice= (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) j3c_id = idx_ji * nao**2*nfao + np.arange(nao**2*nfao) j3ctmp = j3c_junk.read(j3c_id).reshape(nao**2, fused_cell.nao_nr()).T if is_zero(kpt): # kpti == kptj if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kptj) for i in np.where(vbar != 0)[0]: j3ctmp[i] -= vbar[i] * ovlp.reshape(-1) aoao = ft_ao._ft_aopair_kpts(cell, Gv, None, 's1', b, gxyz, Gvbase, kpt, kptj)[0].reshape(len(Gv),-1) j3ctmp[naux:] -= np.dot(Gaux.T.conj(), aoao) j3ctmp = fuse(j3ctmp) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c_ji, j3ctmp, lower=True, overwrite_b=True) else: v = np.dot(j2c_ji, j3ctmp) v = v.T.reshape(nao,nao,naux) j3c_id = idx_ji * nao**2*naux + np.arange(nao**2*naux) j3c.write(j3c_id, v.ravel()) mydf.j3c = j3c return None
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # Create swap file to avoid huge cderi_file. see also function # pyscf.pbc.df.df._make_j3c swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = mydf.weighted_coulG(kpt, False, mesh) LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(LkR*coulG, LkR.T) j2c_k -= lib.dot(LkI*coulG, LkI.T) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(LkR*coulG, LkI*coulG, LkR.T, LkI.T) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d'%k] = j2c_k aoaux = LkR = LkI = j2cR = j2cI = coulG = None j2c = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stability w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, cderi_file, 'int3c2e_sph', aosym='s2', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) feri = h5py.File(cderi_file) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # feri['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() feri['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T aoaux = LkR = LkI = coulG = None j2c = None def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, cderi_file, 'int3c2e_sph', aosym='s2', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) feri = h5py.File(cderi_file) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(kLR.T, kLR) j2c_k -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c_k -= j2cR + j2cI * 1j feri['j2c/%d' % k] = j2c_k aoaux = kLR = kLI = j2cR = j2cI = coulG = None j2c = None def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, gs) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def wrap_int3c(cell, auxcell, intor='int3c2e', aosym='s1', comp=1, kptij_lst=numpy.zeros((1, 2, 3)), cintopt=None, pbcopt=None): intor = cell._add_suffix(intor) pcell = copy.copy(cell) pcell._atm, pcell._bas, pcell._env = \ atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, cell._atm, cell._bas, cell._env) ao_loc = gto.moleintor.make_loc(bas, intor) aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor) ao_loc = numpy.asarray(numpy.hstack([ao_loc, ao_loc[-1] + aux_loc[1:]]), dtype=numpy.int32) atm, bas, env = gto.conc_env(atm, bas, env, auxcell._atm, auxcell._bas, auxcell._env) Ls = cell.get_lattice_Ls() nimgs = len(Ls) kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] if gamma_point(kptij_lst): kk_type = 'g' dtype = numpy.double nkpts = nkptij = 1 kptij_idx = numpy.array([0], dtype=numpy.int32) expkL = numpy.ones(1) elif is_zero(kpti - kptj): # j_only kk_type = 'k' dtype = numpy.complex128 kpts = kptij_idx = numpy.asarray(kpti, order='C') expkL = numpy.exp(1j * numpy.dot(kpts, Ls.T)) nkpts = nkptij = len(kpts) else: kk_type = 'kk' dtype = numpy.complex128 kpts = unique(numpy.vstack([kpti, kptj]))[0] expkL = numpy.exp(1j * numpy.dot(kpts, Ls.T)) wherei = numpy.where( abs(kpti.reshape(-1, 1, 3) - kpts).sum(axis=2) < KPT_DIFF_TOL)[1] wherej = numpy.where( abs(kptj.reshape(-1, 1, 3) - kpts).sum(axis=2) < KPT_DIFF_TOL)[1] nkpts = len(kpts) kptij_idx = numpy.asarray(wherei * nkpts + wherej, dtype=numpy.int32) nkptij = len(kptij_lst) fill = 'PBCnr3c_fill_%s%s' % (kk_type, aosym[:2]) drv = libpbc.PBCnr3c_drv if cintopt is None: cintopt = _vhf.make_cintopt(atm, bas, env, intor) # Remove the precomputed pair data because the pair data corresponds to the # integral of cell #0 while the lattice sum moves shls to all repeated images. if intor[:3] != 'ECP': libpbc.CINTdel_pairdata_optimizer(cintopt) if pbcopt is None: pbcopt = _pbcintor.PBCOpt(pcell).init_rcut_cond(pcell) if isinstance(pbcopt, _pbcintor.PBCOpt): cpbcopt = pbcopt._this else: cpbcopt = lib.c_null_ptr() nbas = cell.nbas def int3c(shls_slice, out): shls_slice = (shls_slice[0], shls_slice[1], nbas + shls_slice[2], nbas + shls_slice[3], nbas * 2 + shls_slice[4], nbas * 2 + shls_slice[5]) drv( getattr(libpbc, intor), getattr(libpbc, fill), out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkptij), ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(nimgs), Ls.ctypes.data_as(ctypes.c_void_p), expkL.ctypes.data_as(ctypes.c_void_p), kptij_idx.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 6)(*shls_slice), ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cpbcopt, atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas), # need to pass cell.nbas to libpbc.PBCnr3c_drv env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size)) return out return int3c
def _aux_e2(cell, auxcell, erifile, intor='int3c2e', aosym='s2ij', comp=None, kptij_lst=None, dataname='eri_mo', shls_slice=None, max_memory=2000, verbose=0): r'''3-center AO integrals (ij|L) with double lattice sum: \sum_{lm} (i[l]j[m]|L[0]), where L is the auxiliary basis. Three-index integral tensor (kptij_idx, nao_pair, naux) or four-index integral tensor (kptij_idx, comp, nao_pair, naux) are stored on disk. **This function should be only used by df and mdf initialization function _make_j3c** Args: kptij_lst : (*,2,3) array A list of (kpti, kptj) ''' intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor), comp) if isinstance(erifile, h5py.Group): feri = erifile elif h5py.is_hdf5(erifile): feri = h5py.File(erifile) else: feri = h5py.File(erifile, 'w') if dataname in feri: del(feri[dataname]) if dataname+'-kptij' in feri: del(feri[dataname+'-kptij']) if kptij_lst is None: kptij_lst = numpy.zeros((1,2,3)) feri[dataname+'-kptij'] = kptij_lst if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas) ao_loc = cell.ao_loc_nr() aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor)[:shls_slice[5]+1] ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] naux = aux_loc[shls_slice[5]] - aux_loc[shls_slice[4]] nkptij = len(kptij_lst) nii = (ao_loc[shls_slice[1]]*(ao_loc[shls_slice[1]]+1)//2 - ao_loc[shls_slice[0]]*(ao_loc[shls_slice[0]]+1)//2) nij = ni * nj kpti = kptij_lst[:,0] kptj = kptij_lst[:,1] aosym_ks2 = abs(kpti-kptj).sum(axis=1) < KPT_DIFF_TOL j_only = numpy.all(aosym_ks2) #aosym_ks2 &= (aosym[:2] == 's2' and shls_slice[:2] == shls_slice[2:4]) aosym_ks2 &= aosym[:2] == 's2' if j_only and aosym[:2] == 's2': assert(shls_slice[2] == 0) nao_pair = nii else: nao_pair = nij if gamma_point(kptij_lst): dtype = numpy.double else: dtype = numpy.complex128 buflen = max(8, int(max_memory*.47e6/16/(nkptij*ni*nj*comp))) auxdims = aux_loc[shls_slice[4]+1:shls_slice[5]+1] - aux_loc[shls_slice[4]:shls_slice[5]] auxranges = balance_segs(auxdims, buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij*comp*ni*nj*buflen, dtype=dtype) buf1 = numpy.empty_like(buf) int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst) kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # sorted_ij_idx: Sort and group the kptij_lst according to the ordering in # df._make_j3c to reduce the data fragment in the hdf5 file. When datasets # are written to hdf5, they are saved sequentially. If the integral data are # saved as the order of kptij_lst, removing the datasets in df._make_j3c will # lead to holes that can not be reused. sorted_ij_idx = numpy.hstack([numpy.where(uniq_inverse == k)[0] for k, kpt in enumerate(uniq_kpts)]) tril_idx = numpy.tril_indices(ni) tril_idx = tril_idx[0] * ni + tril_idx[1] def save(istep, mat): for k in sorted_ij_idx: v = mat[k] if gamma_point(kptij_lst[k]): v = v.real if aosym_ks2[k] and nao_pair == ni**2: v = v[:,tril_idx] feri['%s/%d/%d' % (dataname,k,istep)] = v with lib.call_in_background(save) as bsave: for istep, auxrange in enumerate(auxranges): sh0, sh1, nrow = auxrange sub_slice = (shls_slice[0], shls_slice[1], shls_slice[2], shls_slice[3], shls_slice[4]+sh0, shls_slice[4]+sh1) mat = numpy.ndarray((nkptij,comp,nao_pair,nrow), dtype=dtype, buffer=buf) bsave(istep, int3c(sub_slice, mat)) buf, buf1 = buf1, buf if not isinstance(erifile, h5py.Group): feri.close() return erifile
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 fswap = lib.H5TmpFile() tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] kptjs = kptjs_lst[adapted_ji_idx] coulG = mydf.weighted_coulG(q, False, mydf.mesh) coulG *= factor moij_list = [] ijslice_list = [] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] moij_list.append(moij) ijslice_list.append(ijslice) fswap.create_dataset('zij/'+str(ji), (ngrids,nmoi*nmoj), 'D') for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, kptjs): for ji, aoao in enumerate(aoaoks): ki = adapted_ji_idx[ji] // nkpts kj = adapted_ji_idx[ji] % nkpts buf = aoao.transpose(1,2,0).reshape(nao**2,ngrids) zij = _ao2mo.r_e2(lib.transpose(buf), moij_list[ji], ijslice_list[ji], tao, ao_loc) zij *= coulG[p0:p1,None] fswap['zij/'+str(ji)][p0:p1] = zij mokl_list = [] klslice_list = [] for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] mokl_list.append(mokl) klslice_list.append(klslice) fswap.create_dataset('zkl/'+str(kk), (ngrids,nmok*nmol), 'D') ki = adapted_ji_idx[0] // nkpts kj = adapted_ji_idx[0] % nkpts kptls = kpts[kconserv[ki, kj, :]] for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, -kptls): for kk, aoao in enumerate(aoaoks): buf = aoao.conj().transpose(1,2,0).reshape(nao**2,ngrids) zkl = _ao2mo.r_e2(lib.transpose(buf), mokl_list[kk], klslice_list[kk], tao, ao_loc) fswap['zkl/'+str(kk)][p0:p1] = zkl for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] eri_mo = lib.dot(numpy.asarray(fswap['zij/'+str(ji)]).T, numpy.asarray(fswap['zkl/'+str(kk)])) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl_list[kk], klslice_list[kk], tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) del(fswap['zij']) del(fswap['zkl']) return out
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] eri_mo = numpy.zeros((nmoi*nmoj,nmok*nmol), dtype=numpy.complex128) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl, klslice, tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) return out
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert (out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:, 0] kptjs_lst = kptij_lst[:, 1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max( 2000, mydf.max_memory - lib.current_memory()[0] - nao**4 * 16 / 1e6) * .5 fswap = lib.H5TmpFile() tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] kptjs = kptjs_lst[adapted_ji_idx] coulG = mydf.weighted_coulG(q, False, mydf.mesh) coulG *= factor moij_list = [] ijslice_list = [] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] moij_list.append(moij) ijslice_list.append(ijslice) fswap.create_dataset('zij/' + str(ji), (ngrids, nmoi * nmoj), 'D') for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, kptjs, max_memory=max_memory): for ji, aoao in enumerate(aoaoks): ki = adapted_ji_idx[ji] // nkpts kj = adapted_ji_idx[ji] % nkpts buf = aoao.transpose(1, 2, 0).reshape(nao**2, p1 - p0) zij = _ao2mo.r_e2(lib.transpose(buf), moij_list[ji], ijslice_list[ji], tao, ao_loc) zij *= coulG[p0:p1, None] fswap['zij/' + str(ji)][p0:p1] = zij mokl_list = [] klslice_list = [] for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] mokl_list.append(mokl) klslice_list.append(klslice) fswap.create_dataset('zkl/' + str(kk), (ngrids, nmok * nmol), 'D') ki = adapted_ji_idx[0] // nkpts kj = adapted_ji_idx[0] % nkpts kptls = kpts[kconserv[ki, kj, :]] for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, q, -kptls, max_memory=max_memory): for kk, aoao in enumerate(aoaoks): buf = aoao.conj().transpose(1, 2, 0).reshape(nao**2, p1 - p0) zkl = _ao2mo.r_e2(lib.transpose(buf), mokl_list[kk], klslice_list[kk], tao, ao_loc) fswap['zkl/' + str(kk)][p0:p1] = zkl for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki, kj]], max_memory, False, mydf.blockdim): zij.append( _ao2mo.r_e2(LpqR + LpqI * 1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] eri_mo = lib.dot( numpy.asarray(fswap['zij/' + str(ji)]).T, numpy.asarray(fswap['zkl/' + str(kk)])) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR + LrsI * 1j, mokl_list[kk], klslice_list[kk], tao, ao_loc) lib.dot(zij[i].T, zkl, sign * factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki, kj, kk] = eri_mo.reshape(eri_shape[3:]) del (fswap['zij']) del (fswap['zkl']) return out
def _aux_e2(cell, auxcell, erifile, intor='int3c2e', aosym='s2ij', comp=None, kptij_lst=None, dataname='eri_mo', shls_slice=None, max_memory=2000, verbose=0): r'''3-center AO integrals (ij|L) with double lattice sum: \sum_{lm} (i[l]j[m]|L[0]), where L is the auxiliary basis. Three-index integral tensor (kptij_idx, nao_pair, naux) or four-index integral tensor (kptij_idx, comp, nao_pair, naux) are stored on disk. **This function should be only used by df and mdf initialization function _make_j3c** Args: kptij_lst : (*,2,3) array A list of (kpti, kptj) ''' intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor), comp) if isinstance(erifile, h5py.Group): feri = erifile elif h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') else: feri = h5py.File(erifile, 'w') if dataname in feri: del (feri[dataname]) if dataname + '-kptij' in feri: del (feri[dataname + '-kptij']) if kptij_lst is None: kptij_lst = numpy.zeros((1, 2, 3)) feri[dataname + '-kptij'] = kptij_lst if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas) ao_loc = cell.ao_loc_nr() aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor)[:shls_slice[5] + 1] ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] naux = aux_loc[shls_slice[5]] - aux_loc[shls_slice[4]] nkptij = len(kptij_lst) nii = (ao_loc[shls_slice[1]] * (ao_loc[shls_slice[1]] + 1) // 2 - ao_loc[shls_slice[0]] * (ao_loc[shls_slice[0]] + 1) // 2) nij = ni * nj kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] aosym_ks2 = abs(kpti - kptj).sum(axis=1) < KPT_DIFF_TOL j_only = numpy.all(aosym_ks2) #aosym_ks2 &= (aosym[:2] == 's2' and shls_slice[:2] == shls_slice[2:4]) aosym_ks2 &= aosym[:2] == 's2' if j_only and aosym[:2] == 's2': assert (shls_slice[2] == 0) nao_pair = nii else: nao_pair = nij if gamma_point(kptij_lst): dtype = numpy.double else: dtype = numpy.complex128 buflen = max(8, int(max_memory * .47e6 / 16 / (nkptij * ni * nj * comp))) auxdims = aux_loc[shls_slice[4] + 1:shls_slice[5] + 1] - aux_loc[shls_slice[4]:shls_slice[5]] auxranges = balance_segs(auxdims, buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * comp * ni * nj * buflen, dtype=dtype) buf1 = numpy.empty_like(buf) int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst) kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # sorted_ij_idx: Sort and group the kptij_lst according to the ordering in # df._make_j3c to reduce the data fragment in the hdf5 file. When datasets # are written to hdf5, they are saved sequentially. If the integral data are # saved as the order of kptij_lst, removing the datasets in df._make_j3c will # lead to holes that can not be reused. sorted_ij_idx = numpy.hstack( [numpy.where(uniq_inverse == k)[0] for k, kpt in enumerate(uniq_kpts)]) tril_idx = numpy.tril_indices(ni) tril_idx = tril_idx[0] * ni + tril_idx[1] def save(istep, mat): for k in sorted_ij_idx: v = mat[k] if gamma_point(kptij_lst[k]): v = v.real if aosym_ks2[k] and nao_pair == ni**2: v = v[:, tril_idx] feri['%s/%d/%d' % (dataname, k, istep)] = v with lib.call_in_background(save) as bsave: for istep, auxrange in enumerate(auxranges): sh0, sh1, nrow = auxrange sub_slice = (shls_slice[0], shls_slice[1], shls_slice[2], shls_slice[3], shls_slice[4] + sh0, shls_slice[4] + sh1) mat = numpy.ndarray((nkptij, comp, nao_pair, nrow), dtype=dtype, buffer=buf) bsave(istep, int3c(sub_slice, mat)) buf, buf1 = buf1, buf if not isinstance(erifile, h5py.Group): feri.close() return erifile
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None if cell.dimension == 1 or cell.dimension == 2: plain_ints = _gaussian_int(fused_cell) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints) aoaux = aoaux.T LkR = aoaux.real * coulG[p0:p1] LkI = aoaux.imag * coulG[p0:p1] aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def ao2mo_7d(mydf, mo_coeff_kpts, kpts=None, factor=1, out=None): cell = mydf.cell if kpts is None: kpts = mydf.kpts nkpts = len(kpts) if isinstance(mo_coeff_kpts, numpy.ndarray) and mo_coeff_kpts.ndim == 3: mo_coeff_kpts = [mo_coeff_kpts] * 4 else: mo_coeff_kpts = list(mo_coeff_kpts) # Shape of the orbitals can be different on different k-points. The # orbital coefficients must be formatted (padded by zeros) so that the # shape of the orbital coefficients are the same on all k-points. This can # be achieved by calling pbc.mp.kmp2.padded_mo_coeff function nmoi, nmoj, nmok, nmol = [x.shape[2] for x in mo_coeff_kpts] eri_shape = (nkpts, nkpts, nkpts, nmoi, nmoj, nmok, nmol) if gamma_point(kpts): dtype = numpy.result_type(*mo_coeff_kpts) else: dtype = numpy.complex128 if out is None: out = numpy.empty(eri_shape, dtype=dtype) else: assert(out.shape == eri_shape) kptij_lst = numpy.array([(ki, kj) for ki in kpts for kj in kpts]) kptis_lst = kptij_lst[:,0] kptjs_lst = kptij_lst[:,1] kpt_ji = kptjs_lst - kptis_lst uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) ngrids = numpy.prod(mydf.mesh) nao = cell.nao_nr() max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) * .5 tao = [] ao_loc = None kconserv = kpts_helper.get_kconserv(cell, kpts) for uniq_id, kpt in enumerate(uniq_kpts): q = uniq_kpts[uniq_id] adapted_ji_idx = numpy.where(uniq_inverse == uniq_id)[0] for ji, ji_idx in enumerate(adapted_ji_idx): ki = ji_idx // nkpts kj = ji_idx % nkpts moij, ijslice = _conc_mos(mo_coeff_kpts[0][ki], mo_coeff_kpts[1][kj])[2:] zij = [] for LpqR, LpqI, sign in mydf.sr_loop(kpts[[ki,kj]], max_memory, False, mydf.blockdim): zij.append(_ao2mo.r_e2(LpqR+LpqI*1j, moij, ijslice, tao, ao_loc)) for kk in range(nkpts): kl = kconserv[ki, kj, kk] mokl, klslice = _conc_mos(mo_coeff_kpts[2][kk], mo_coeff_kpts[3][kl])[2:] eri_mo = numpy.zeros((nmoi*nmoj,nmok*nmol), dtype=numpy.complex128) for i, (LrsR, LrsI, sign) in \ enumerate(mydf.sr_loop(kpts[[kk,kl]], max_memory, False, mydf.blockdim)): zkl = _ao2mo.r_e2(LrsR+LrsI*1j, mokl, klslice, tao, ao_loc) lib.dot(zij[i].T, zkl, sign*factor, eri_mo, 1) if dtype == numpy.double: eri_mo = eri_mo.real out[ki,kj,kk] = eri_mo.reshape(eri_shape[3:]) return out