def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d' % idx][:, col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d' % ji, j3cR[k], col0, col1) else: save('j3c/%d' % ji, j3cR[k] + j3cI[k] * 1j, col0, col1)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(auxcell) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d'%idx][:,col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:,col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:,col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) if is_zero(kpt): for k, ji in enumerate(adapted_ji_idx): if gamma_point(adapted_kptjs[k]): for i, c in enumerate(vbar): if c != 0: j3cR[k][i] -= c * ovlp[k][col0:col1].real else: for i, c in enumerate(vbar): if c != 0: j3cR[k][i] -= c * ovlp[k][col0:col1].real j3cI[k][i] -= c * ovlp[k][col0:col1].imag for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d'%ji, j3cR[k], col0, col1) else: save('j3c/%d'%ji, j3cR[k]+j3cI[k]*1j, col0, col1)
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs gxyz = lib.cartesian_prod( (numpy.append(range(gs[0] + 1), range(-gs[0], 0)), numpy.append(range(gs[1] + 1), range(-gs[1], 0)), numpy.append(range(gs[2] + 1), range(-gs[2], 0)))) invh = numpy.linalg.inv(cell._h) Gv = 2 * numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, invh, gxyz, gs, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(tools.get_coulG(cell, kpt, gs=gs) / cell.vol) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c[k] = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c[k] -= lib.dot(kLR.T, kLR) j2c[k] -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c[k] -= j2cR + j2cI * 1j kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = kLR = kLI = j2cR = j2cI = coulG = None feri = h5py.File(mydf._cderi) # Expand approx Lpq for aosym='s1'. The approx Lpq are all in aosym='s2' mode if mydf.approx_sr_level > 0 and len(kptij_lst) > 1: Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao) def save(label, dat, col0, col1): nrow = dat.shape[0] feri[label][:nrow, col0:col1] = dat def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d' % idx][:, col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d' % ji, j3cR[k], col0, col1) else: save('j3c/%d' % ji, j3cR[k] + j3cI[k] * 1j, col0, col1) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell_(mydf, mydf.auxcell) outcore.aux_e2( cell, fused_cell, mydf._cderi, "cint3c2e_sph", kptij_lst=kptij_lst, dataname="j3c", max_memory=max_memory ) t1 = log.timer_debug1("3c2e", *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs gxyz = lib.cartesian_prod( ( numpy.append(range(gs[0] + 1), range(-gs[0], 0)), numpy.append(range(gs[1] + 1), range(-gs[1], 0)), numpy.append(range(gs[2] + 1), range(-gs[2], 0)), ) ) invh = numpy.linalg.inv(cell._h) Gv = 2 * numpy.pi * numpy.dot(gxyz, invh) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor("cint2c2e_sph", hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, invh, gxyz, gs, kpt).T aoaux = fuse(aoaux) coulG = numpy.sqrt(tools.get_coulG(cell, kpt, gs=gs) / cell.vol) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) j2c[k] = fuse(fuse(j2c[k]).T).T.copy() if is_zero(kpt): # kpti == kptj j2c[k] -= lib.dot(kLR.T, kLR) j2c[k] -= lib.dot(kLI.T, kLI) else: # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI) j2c[k] -= j2cR + j2cI * 1j kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = kLR = kLI = j2cR = j2cI = coulG = None feri = h5py.File(mydf._cderi) # Expand approx Lpq for aosym='s1'. The approx Lpq are all in aosym='s2' mode if mydf.approx_sr_level > 0 and len(kptij_lst) > 1: Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao) def save(label, dat, col0, col1): nrow = dat.shape[0] feri[label][:nrow, col0:col1] = dat def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1("kpt = %s", kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1("adapted_ji_idx = %s", adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = "s2" nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor("cint1e_ovlp_sph", hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = "s1" nao_pair = nao ** 2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * 0.6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == "s2": Gblksize = max(16, int(max_memory * 0.2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * 0.4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1("int3c2e [%d/%d], AO [%d:%d], ncol = %d", istep + 1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri["j3c/%d" % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri["Lpq/%d" % idx][:, col0:col1]) elif aosym == "s2": Lpq = numpy.asarray(feri["Lpq/0"][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -0.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order="C")) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order="C")) v = Lpq = None if aosym == "s2": shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts( cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf ) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order="F", buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts( cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf ) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order="F", buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save("j3c/%d" % ji, j3cR[k], col0, col1) else: save("j3c/%d" % ji, j3cR[k] + j3cI[k] * 1j, col0, col1) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()