def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # Create swap file to avoid huge cderi_file. see also function # pyscf.pbc.df.df._make_j3c swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = mydf.weighted_coulG(kpt, False, mesh) LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(LkR*coulG, LkR.T) j2c_k -= lib.dot(LkI*coulG, LkI.T) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(LkR*coulG, LkI*coulG, LkR.T, LkI.T) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d'%k] = j2c_k aoaux = LkR = LkI = j2cR = j2cI = coulG = None j2c = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stability w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None if cell.dimension == 1 or cell.dimension == 2: plain_ints = _gaussian_int(fused_cell) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints) aoaux = aoaux.T LkR = aoaux.real * coulG[p0:p1] LkI = aoaux.imag * coulG[p0:p1] aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore._aux_e2(cell, fused_cell, cderi_file, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) fswap = lib.H5TmpFile() for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c_k = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c_k -= lib.dot(kLR.T, kLR) j2c_k -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c_k -= j2cR + j2cI * 1j fswap['j2c/%d' % k] = j2c_k aoaux = kLR = kLI = j2cR = j2cI = coulG = None j2c = None feri = h5py.File(cderi_file) nsegs = len(feri['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d' % (ji, istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [ feri['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del (feri['j3c-junk/%d' % ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri['j3c-kptij'] = feri['j3c-junk-kptij'] del (feri['j3c-junk']) feri.close()