def sr_loop(self, kpti_kptj=numpy.zeros((2, 3)), max_memory=2000, compact=True, blksize=None): '''Short range part''' if self._cderi is None: self.build() cell = self.cell kpti, kptj = kpti_kptj unpack = is_zero(kpti - kptj) and not compact is_real = is_zero(kpti_kptj) nao = cell.nao_nr() if blksize is None: if is_real: blksize = max_memory * 1e6 / 8 / (nao**2 * 2) else: blksize = max_memory * 1e6 / 16 / (nao**2 * 2) blksize /= 2 # For prefetch blksize = max(16, min(int(blksize), self.blockdim)) logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory, blksize) def load(aux_slice): b0, b1 = aux_slice if is_real: LpqR = numpy.asarray(j3c[b0:b1]) if unpack: LpqR = lib.unpack_tril(LpqR).reshape(-1, nao**2) LpqI = numpy.zeros_like(LpqR) else: Lpq = numpy.asarray(j3c[b0:b1]) LpqR = numpy.asarray(Lpq.real, order='C') LpqI = numpy.asarray(Lpq.imag, order='C') Lpq = None if unpack: LpqR = lib.unpack_tril(LpqR).reshape(-1, nao**2) LpqI = lib.unpack_tril(LpqI, lib.ANTIHERMI).reshape(-1, nao**2) return LpqR, LpqI with _load3c(self._cderi, 'j3c', kpti_kptj, 'j3c-kptij') as j3c: slices = lib.prange(0, j3c.shape[0], blksize) for LpqR, LpqI in lib.map_with_prefetch(load, slices): yield LpqR, LpqI, 1 LpqR = LpqI = None if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': # Truncated Coulomb operator is not postive definite. Load the # CDERI tensor of negative part. with _load3c(self._cderi, 'j3c-', kpti_kptj, 'j3c-kptij', ignore_key_error=True) as j3c: slices = lib.prange(0, j3c.shape[0], blksize) for LpqR, LpqI in lib.map_with_prefetch(load, slices): yield LpqR, LpqI, -1 LpqR = LpqI = None
def loop(self, blksize=None): if self._cderi is None: self.build() if blksize is None: blksize = self.blockdim with addons.load(self._cderi, 'j3c') as feri: if isinstance(feri, numpy.ndarray): naoaux = feri.shape[0] for b0, b1 in self.prange(0, naoaux, blksize): yield numpy.asarray(feri[b0:b1], order='C') else: if isinstance(feri, h5py.Group): # starting from pyscf-1.7, DF tensor may be stored in # block format naoaux = feri['0'].shape[0] def load(aux_slice): b0, b1 = aux_slice return _load_from_h5g(feri, b0, b1) else: naoaux = feri.shape[0] def load(aux_slice): b0, b1 = aux_slice return numpy.asarray(feri[b0:b1]) for dat in lib.map_with_prefetch( load, self.prange(0, naoaux, blksize)): yield dat dat = None
def cholesky_eri(mol, erifile, auxbasis='weigend+etb', dataname='j3c', tmpdir=None, int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1, max_memory=MAX_MEMORY, auxmol=None, verbose=logger.NOTE): '''3-index density-fitting tensor. ''' assert(aosym in ('s1', 's2ij')) assert(comp == 1) log = logger.new_logger(mol, verbose) time0 = (time.clock(), time.time()) if auxmol is None: auxmol = make_auxmol(mol, auxbasis) if tmpdir is None: tmpdir = lib.param.TMPDIR swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) cholesky_eri_b(mol, swapfile.name, auxbasis, dataname, int3c, aosym, int2c, comp, max_memory, auxmol, verbose=log) fswap = h5py.File(swapfile.name, 'r') time1 = log.timer('generate (ij|L) 1 pass', *time0) # Cannot let naoaux = auxmol.nao_nr() if auxbasis has linear dependence nao = mol.nao_nr() if aosym == 's1': nao_pair = nao * nao else: nao_pair = nao * (nao+1) // 2 feri = _create_h5file(erifile, dataname) if comp == 1: naoaux = fswap['%s/0'%dataname].shape[0] h5d_eri = feri.create_dataset(dataname, (naoaux,nao_pair), 'f8') else: naoaux = fswap['%s/0'%dataname].shape[1] h5d_eri = feri.create_dataset(dataname, (comp,naoaux,nao_pair), 'f8') iolen = min(max(int(max_memory*.45e6/8/nao_pair), 28), naoaux) totstep = (naoaux+iolen-1)//iolen def load(row_slice): row0, row1 = row_slice return _load_from_h5g(fswap[dataname], row0, row1) ti0 = time1 slices = list(lib.prange(0, naoaux, iolen)) for istep, dat in enumerate(lib.map_with_prefetch(load, slices)): row0, row1 = slices[istep] nrow = row1 - row0 if comp == 1: h5d_eri[row0:row1] = dat else: h5d_eri[:,row0:row1] = dat dat = None ti0 = log.timer('step 2 [%d/%d], [%d:%d], row = %d'% (istep+1, totstep, row0, row1, nrow), *ti0) fswap.close() feri.close() log.timer('cholesky_eri', *time0) return erifile
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def cholesky_eri_b(mol, erifile, auxbasis='weigend+etb', dataname='j3c', int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1, max_memory=MAX_MEMORY, auxmol=None, verbose=logger.NOTE): '''3-center 2-electron DF tensor. Similar to cholesky_eri while this function stores DF tensor in blocks. ''' assert(aosym in ('s1', 's2ij')) log = logger.new_logger(mol, verbose) time0 = (time.clock(), time.time()) if auxmol is None: auxmol = make_auxmol(mol, auxbasis) j2c = auxmol.intor(int2c, hermi=1) log.debug('size of aux basis %d', j2c.shape[0]) time1 = log.timer('2c2e', *time0) try: low = scipy.linalg.cholesky(j2c, lower=True) tag = 'cd' except scipy.linalg.LinAlgError: w, v = scipy.linalg.eigh(j2c) idx = w > LINEAR_DEP_THR low = (v[:,idx] / numpy.sqrt(w[idx])) v = None tag = 'eig' j2c = None naoaux, naux = low.shape time1 = log.timer('Cholesky 2c2e', *time1) int3c = gto.moleintor.ascint3(mol._add_suffix(int3c)) atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env, auxmol._atm, auxmol._bas, auxmol._env) ao_loc = gto.moleintor.make_loc(bas, int3c) nao = ao_loc[mol.nbas] naoaux = ao_loc[-1] - nao if aosym == 's1': nao_pair = nao * nao buflen = min(max(int(max_memory*.24e6/8/naoaux/comp), 1), nao_pair) shranges = _guess_shell_ranges(mol, buflen, 's1') else: nao_pair = nao * (nao+1) // 2 buflen = min(max(int(max_memory*.24e6/8/naoaux/comp), 1), nao_pair) shranges = _guess_shell_ranges(mol, buflen, 's2ij') log.debug('erifile %.8g MB, IO buf size %.8g MB', naoaux*nao_pair*8/1e6, comp*buflen*naoaux*8/1e6) log.debug1('shranges = %s', shranges) # TODO: Libcint-3.14 and newer version support to compute int3c2e without # the opt for the 3rd index. #if '3c2e' in int3c: # cintopt = gto.moleintor.make_cintopt(atm, mol._bas, env, int3c) #else: # cintopt = gto.moleintor.make_cintopt(atm, bas, env, int3c) cintopt = gto.moleintor.make_cintopt(atm, bas, env, int3c) bufs1 = numpy.empty((comp*max([x[2] for x in shranges]),naoaux)) def transform(b): if b.ndim == 3 and b.flags.f_contiguous: b = lib.transpose(b.T, axes=(0,2,1)).reshape(naoaux,-1) else: b = b.reshape((-1,naoaux)).T if tag == 'cd': if b.flags.c_contiguous: b = lib.transpose(b).T return scipy.linalg.solve_triangular(low, b, lower=True, overwrite_b=True, check_finite=False) else: return lib.dot(low.T, b) def process(sh_range): bstart, bend, nrow = sh_range shls_slice = (bstart, bend, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas) ints = gto.moleintor.getints3c(int3c, atm, bas, env, shls_slice, comp, aosym, ao_loc, cintopt, out=bufs1) if comp == 1: dat = transform(ints) else: dat = [transform(x) for x in ints] return dat feri = _create_h5file(erifile, dataname) for istep, dat in enumerate(lib.map_with_prefetch(process, shranges)): sh_range = shranges[istep] label = '%s/%d'%(dataname,istep) if comp == 1: feri[label] = dat else: shape = (len(dat),) + dat[0].shape fdat = feri.create_dataset(label, shape, dat[0].dtype.char) for i, b in enumerate(dat): fdat[i] = b dat = None log.debug('int3c2e [%d/%d], AO [%d:%d], nrow = %d', istep+1, len(shranges), *sh_range) time1 = log.timer('gen CD eri [%d/%d]' % (istep+1,len(shranges)), *time1) bufs1 = None feri.close() return erifile
def general(mol, mo_coeffs, erifile, auxbasis='weigend+etb', dataname='eri_mo', tmpdir=None, int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1, max_memory=MAX_MEMORY, verbose=0, compact=True): ''' Transform ij of (ij|L) to MOs. ''' assert(aosym in ('s1', 's2ij')) time0 = (time.clock(), time.time()) log = logger.new_logger(mol, verbose) if tmpdir is None: tmpdir = lib.param.TMPDIR swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) cholesky_eri_b(mol, swapfile.name, auxbasis, dataname, int3c, aosym, int2c, comp, max_memory, verbose=log) fswap = h5py.File(swapfile.name, 'r') time1 = log.timer('AO->MO eri transformation 1 pass', *time0) nao = mo_coeffs[0].shape[0] if aosym == 's1': nao_pair = nao * nao aosym_as_nr_e2 = 's1' else: nao_pair = nao * (nao+1) // 2 aosym_as_nr_e2 = 's2kl' ijmosym, nij_pair, moij, ijshape = \ ao2mo.incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym != 's1') naoaux = fswap['%s/0'%dataname].shape[-2] feri = _create_h5file(erifile, dataname) if comp == 1: h5d_eri = feri.create_dataset(dataname, (naoaux,nij_pair), 'f8') else: h5d_eri = feri.create_dataset(dataname, (comp,naoaux,nij_pair), 'f8') def load(row_slice): row0, row1 = row_slice return _load_from_h5g(fswap[dataname], row0, row1) iolen = min(max(int(max_memory*.45e6/8/(nao_pair+nij_pair)), 28), naoaux) totstep = (naoaux+iolen-1)//iolen ti0 = time1 slices = list(lib.prange(0, naoaux, iolen)) for istep, dat in enumerate(lib.map_with_prefetch(load, slices)): row0, row1 = slices[istep] nrow = row1 - row0 if comp == 1: dat = _ao2mo.nr_e2(dat, moij, ijshape, aosym_as_nr_e2, ijmosym) h5d_eri[row0:row1] = dat else: dat = _ao2mo.nr_e2(dat.reshape(comp*nrow, nao_pair), moij, ijshape, aosym_as_nr_e2, ijmosym) h5d_eri[:,row0:row1] = dat.reshape(comp, nrow, nij_pair) dat = None log.debug('step 2 [%d/%d], [%d:%d], row = %d', istep+1, totstep, row0, row1, nrow) ti0 = log.timer('step 2 [%d/%d], [%d:%d], row = %d'% (istep+1, totstep, row0, row1, nrow), *ti0) fswap.close() feri.close() log.timer('AO->MO CD eri transformation 2 pass', *time1) log.timer('AO->MO CD eri transformation', *time0) return erifile
def _aux_e2(cell, auxcell, erifile, intor='int3c2e', aosym='s2ij', comp=None, kptij_lst=None, dataname='eri_mo', shls_slice=None, max_memory=2000, verbose=0): r'''3-center AO integrals (ij|L) with double lattice sum: \sum_{lm} (i[l]j[m]|L[0]), where L is the auxiliary basis. Three-index integral tensor (kptij_idx, nao_pair, naux) or four-index integral tensor (kptij_idx, comp, nao_pair, naux) are stored on disk. **This function should be only used by df and mdf initialization function _make_j3c** Args: kptij_lst : (*,2,3) array A list of (kpti, kptj) ''' intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor), comp) if isinstance(erifile, h5py.Group): feri = erifile elif h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') else: feri = h5py.File(erifile, 'w') if dataname in feri: del (feri[dataname]) if dataname + '-kptij' in feri: del (feri[dataname + '-kptij']) if kptij_lst is None: kptij_lst = numpy.zeros((1, 2, 3)) feri[dataname + '-kptij'] = kptij_lst if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas) ao_loc = cell.ao_loc_nr() aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor)[:shls_slice[5] + 1] ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] nkptij = len(kptij_lst) nii = (ao_loc[shls_slice[1]] * (ao_loc[shls_slice[1]] + 1) // 2 - ao_loc[shls_slice[0]] * (ao_loc[shls_slice[0]] + 1) // 2) nij = ni * nj kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] aosym_ks2 = abs(kpti - kptj).sum(axis=1) < KPT_DIFF_TOL j_only = numpy.all(aosym_ks2) #aosym_ks2 &= (aosym[:2] == 's2' and shls_slice[:2] == shls_slice[2:4]) aosym_ks2 &= aosym[:2] == 's2' if j_only and aosym[:2] == 's2': assert (shls_slice[2] == 0) nao_pair = nii else: nao_pair = nij if gamma_point(kptij_lst): dtype = numpy.double else: dtype = numpy.complex128 buflen = max(8, int(max_memory * .47e6 / 16 / (nkptij * ni * nj * comp))) auxdims = aux_loc[shls_slice[4] + 1:shls_slice[5] + 1] - aux_loc[shls_slice[4]:shls_slice[5]] auxranges = balance_segs(auxdims, buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * comp * ni * nj * buflen, dtype=dtype) bufs = [buf, numpy.empty_like(buf)] int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst) def process(aux_range): sh0, sh1, nrow = aux_range sub_slice = (shls_slice[0], shls_slice[1], shls_slice[2], shls_slice[3], shls_slice[4] + sh0, shls_slice[4] + sh1) mat = numpy.ndarray((nkptij, comp, nao_pair, nrow), dtype=dtype, buffer=bufs[0]) bufs[:] = bufs[1], bufs[0] int3c(sub_slice, mat) return mat kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # sorted_ij_idx: Sort and group the kptij_lst according to the ordering in # df._make_j3c to reduce the data fragment in the hdf5 file. When datasets # are written to hdf5, they are saved sequentially. If the integral data are # saved as the order of kptij_lst, removing the datasets in df._make_j3c will # lead to disk space fragment that can not be reused. sorted_ij_idx = numpy.hstack( [numpy.where(uniq_inverse == k)[0] for k, kpt in enumerate(uniq_kpts)]) tril_idx = numpy.tril_indices(ni) tril_idx = tril_idx[0] * ni + tril_idx[1] for istep, mat in enumerate(lib.map_with_prefetch(process, auxranges)): for k in sorted_ij_idx: v = mat[k] if gamma_point(kptij_lst[k]): v = v.real if aosym_ks2[k] and nao_pair == ni**2: v = v[:, tril_idx] feri['%s/%d/%d' % (dataname, k, istep)] = v mat = None if not isinstance(erifile, h5py.Group): feri.close() return erifile