def half_e1(mol, mo_coeffs, swapfile, intor='int2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True, ao2mopt=None): r'''Half transform arbitrary spherical AO integrals to MO integrals for the given two sets of orbitals Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeff : ndarray Transform (ij|kl) with the same set of orbitals. swapfile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. The transformed integrals are saved in blocks. Kwargs intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. int2e_ip_sph has 3 components. verbose : int Print level max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals ao2mopt : :class:`AO2MOpt` object Precomputed data to improve perfomance Returns: None ''' time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2ij'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao ijmosym, nij_pair, moij, ijshape = \ incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym in ('s4', 's2ij')) e1buflen, mem_words, iobuf_words, ioblk_words = \ guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp) ioblk_size = ioblk_words * 8/1e6 # The buffer to hold AO integrals in C code, see line (@) aobuflen = max(int((mem_words - 2*comp*e1buflen*nij_pair) // (nao_pair*comp)), IOBUF_ROW_MIN) shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen) if ao2mopt is None: if intor in ('int2e_sph', 'int2e_cart'): ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') else: ao2mopt = _ao2mo.AO2MOpt(mol, intor) if isinstance(swapfile, str): fswap = h5py.File(swapfile, 'w') else: fswap = swapfile for icomp in range(comp): g = fswap.create_group(str(icomp)) # for h5py old version log.debug('step1: tmpfile %s %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6) log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB', nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6) nstep = len(shranges) e1buflen = max([x[2] for x in shranges]) e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, e1buflen) def save(istep, iobuf): for icomp in range(comp): _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp], e2buflen, None) def async_write(istep, iobuf, thread_io): if thread_io is not None: thread_io.join() thread_io = lib.background_thread(save, istep, iobuf) return thread_io # transform e1 ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0) bufs1 = numpy.empty((comp*e1buflen,nao_pair)) bufs2 = numpy.empty((comp*e1buflen,nij_pair)) buf_write = numpy.empty_like(bufs2) write_handler = None for istep,sh_range in enumerate(shranges): log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \ istep+1, nstep, *(sh_range[:3])) buflen = sh_range[2] iobuf = numpy.ndarray((comp,buflen,nij_pair), buffer=bufs2) nmic = len(sh_range[3]) p0 = 0 for imic, aoshs in enumerate(sh_range[3]): log.debug2(' fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \ imic+1, nmic, *aoshs) buf = numpy.ndarray((comp*aoshs[2],nao_pair), buffer=bufs1) # (@) _ao2mo.nr_e1fill(intor, aoshs, mol._atm, mol._bas, mol._env, aosym, comp, ao2mopt, out=buf) buf = _ao2mo.nr_e1(buf, moij, ijshape, aosym, ijmosym) iobuf[:,p0:p0+aoshs[2]] = buf.reshape(comp,aoshs[2],-1) p0 += aoshs[2] ti0 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) write_handler = async_write(istep, iobuf, write_handler) bufs2, buf_write = buf_write, bufs2 # avoid flushing writing buffer write_handler.join() bufs1 = bufs2 = None if isinstance(swapfile, str): fswap.close() return swapfile
def trans_e1_outcore(mol, mo, ncore, ncas, erifile, max_memory=None, level=1, verbose=logger.WARN): time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) log.debug1('trans_e1_outcore level %d max_memory %d', level, max_memory) nao, nmo = mo.shape nao_pair = nao*(nao+1)//2 nocc = ncore + ncas _tmpfile1 = tempfile.NamedTemporaryFile() faapp_buf = h5py.File(_tmpfile1.name) feri = h5py.File(erifile, 'w') mo_c = numpy.asarray(mo, order='C') mo = numpy.asarray(mo, order='F') pashape = (0, nmo, ncore, nocc) papa_buf = numpy.zeros((nao,ncas,nmo*ncas)) j_pc = numpy.zeros((nmo,ncore)) k_pc = numpy.zeros((nmo,ncore)) mem_words = int(max(2000,max_memory-papa_buf.nbytes/1e6)*1e6/8) aobuflen = mem_words//(nao_pair+nocc*nmo) + 1 ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc) ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nstep = len(shranges) paapp = 0 maxbuflen = max([x[2] for x in shranges]) log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words*8/1e6, maxbuflen) bufs1 = numpy.empty((maxbuflen, nao_pair)) bufs2 = numpy.empty((maxbuflen, nmo*ncas)) if level == 1: bufs3 = numpy.empty((maxbuflen, nao*ncore)) log.debug('mem cache %.8g MB', (bufs1.nbytes+bufs2.nbytes+bufs3.nbytes)/1e6) else: log.debug('mem cache %.8g MB', (bufs1.nbytes+bufs2.nbytes)/1e6) ti0 = log.timer('Initializing trans_e1_outcore', *time0) # fmmm, ftrans, fdrv for level 1 fmmm = _fpointer('MCSCFhalfmmm_nr_s2_ket') ftrans = _fpointer('AO2MOtranse1_nr_s4') fdrv = getattr(libmcscf, 'AO2MOnr_e2_drv') for istep,sh_range in enumerate(shranges): log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep+1, nstep, *sh_range) buf = bufs1[:sh_range[2]] _ao2mo.nr_e1fill('cint2e_sph', sh_range, mol._atm, mol._bas, mol._env, 's4', 1, ao2mopt, buf) if log.verbose >= logger.DEBUG1: ti1 = log.timer('AO integrals buffer', *ti0) bufpa = bufs2[:sh_range[2]] _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa) # jc_pp, kc_pp if level == 1: # ppaa, papa and vhf, jcp, kcp if log.verbose >= logger.DEBUG1: ti1 = log.timer('buffer-pa', *ti1) buf1 = bufs3[:sh_range[2]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), buf.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(nao), ctypes.c_int(0), ctypes.c_int(ncore), ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0)) p0 = 0 for ij in range(sh_range[0], sh_range[1]): i,j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i+1] j1 = ao_loc[j+1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di+1) // 2 buf = numpy.empty((di,di,nao*ncore)) idx = numpy.tril_indices(di) buf[idx] = buf1[p0:p0+dij] buf[idx[1],idx[0]] = buf1[p0:p0+dij] buf = buf.reshape(di,di,nao,ncore) mo1 = mo_c[i0:i1] tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore]) tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di,-1)) j_pc += numpy.einsum('vp,pvc->pc', mo1, tmp.reshape(nmo,di,ncore)) tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:,:ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(di,ncore,nmo) k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp) else: dij = di * dj buf = buf1[p0:p0+dij].reshape(di,dj,nao,ncore) mo1 = mo_c[i0:i1] mo2 = mo_c[j0:j1] tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore]) tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di,-1)) j_pc += numpy.einsum('vp,pvc->pc', mo2, tmp.reshape(nmo,dj,ncore)) * 2 tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:,:ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(dj,ncore,nmo) k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp) tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:,:ncore]) tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(di,ncore,nmo) k_pc += numpy.einsum('up,ucp->pc', mo1, tmp) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('j_cp and k_cp', *ti1) if log.verbose >= logger.DEBUG1: ti1 = log.timer('half transformation of the buffer', *ti1) # ppaa, papa faapp_buf[str(istep)] = \ bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T p0 = 0 for ij in range(sh_range[0], sh_range[1]): i,j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i+1] j1 = ao_loc[j+1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di+1) // 2 buf1 = numpy.empty((di,di,nmo*ncas)) idx = numpy.tril_indices(di) buf1[idx] = bufpa[p0:p0+dij] buf1[idx[1],idx[0]] = bufpa[p0:p0+dij] else: dij = di * dj buf1 = bufpa[p0:p0+dij].reshape(di,dj,-1) mo1 = mo[j0:j1,ncore:nocc].copy() for i in range(di): pyscf.lib.dot(mo1.T, buf1[i], 1, papa_buf[i0+i], 1) mo1 = mo[i0:i1,ncore:nocc].copy() buf1 = pyscf.lib.dot(mo1.T, buf1.reshape(di,-1)) papa_buf[j0:j1] += buf1.reshape(ncas,dj,-1).transpose(1,0,2) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('ppaa and papa buffer', *ti1) ti0 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) buf = buf1 = bufpa = None bufs1 = bufs2 = bufs3 = None time1 = log.timer('mc_ao2mo pass 1', *time0) log.debug1('Half transformation done. Current memory %d', pyscf.lib.current_memory()[0]) nblk = int(max(8, min(nmo, (max_memory*1e6/8-papa_buf.size)/(ncas**2*nmo)))) log.debug1('nblk for papa = %d', nblk) dset = feri.create_dataset('papa', (nmo,ncas,nmo,ncas), 'f8') for i0, i1 in prange(0, nmo, nblk): tmp = pyscf.lib.dot(mo[:,i0:i1].T, papa_buf.reshape(nao,-1)) dset[i0:i1] = tmp.reshape(i1-i0,ncas,nmo,ncas) papa_buf = tmp = None time1 = log.timer('papa pass 2', *time1) tmp = numpy.empty((ncas**2,nao_pair)) p0 = 0 for istep, sh_range in enumerate(shranges): tmp[:,p0:p0+sh_range[2]] = faapp_buf[str(istep)] p0 += sh_range[2] nblk = int(max(8, min(nmo, (max_memory*1e6/8-tmp.size)/(ncas**2*nmo)-1))) log.debug1('nblk for ppaa = %d', nblk) dset = feri.create_dataset('ppaa', (nmo,nmo,ncas,ncas), 'f8') for i0, i1 in prange(0, nmo, nblk): tmp1 = _ao2mo.nr_e2(tmp, mo, (i0,i1,0,nmo), 's4', 's1', ao_loc=ao_loc) tmp1 = tmp1.reshape(ncas,ncas,i1-i0,nmo) for j in range(i1-i0): dset[i0+j] = tmp1[:,:,j].transpose(2,0,1) tmp = tmp1 = None time1 = log.timer('ppaa pass 2', *time1) faapp_buf.close() feri.close() _tmpfile1 = None time0 = log.timer('mc_ao2mo', *time0) return j_pc, k_pc
def trans_e1_outcore(mol, mo, ncore, ncas, erifile, max_memory=None, level=1, verbose=logger.WARN): time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) log.debug1('trans_e1_outcore level %d max_memory %d', level, max_memory) nao, nmo = mo.shape nao_pair = nao * (nao + 1) // 2 nocc = ncore + ncas _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) faapp_buf = h5py.File(_tmpfile1.name) feri = h5py.File(erifile, 'w') mo_c = numpy.asarray(mo, order='C') mo = numpy.asarray(mo, order='F') pashape = (0, nmo, ncore, nocc) papa_buf = numpy.zeros((nao, ncas, nmo * ncas)) j_pc = numpy.zeros((nmo, ncore)) k_pc = numpy.zeros((nmo, ncore)) mem_words = int(max(2000, max_memory - papa_buf.nbytes / 1e6) * 1e6 / 8) aobuflen = mem_words // (nao_pair + nocc * nmo) + 1 ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc) intor = mol._add_suffix('int2e') ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nstep = len(shranges) paapp = 0 maxbuflen = max([x[2] for x in shranges]) log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words * 8 / 1e6, maxbuflen) bufs1 = numpy.empty((maxbuflen, nao_pair)) bufs2 = numpy.empty((maxbuflen, nmo * ncas)) if level == 1: bufs3 = numpy.empty((maxbuflen, nao * ncore)) log.debug('mem cache %.8g MB', (bufs1.nbytes + bufs2.nbytes + bufs3.nbytes) / 1e6) else: log.debug('mem cache %.8g MB', (bufs1.nbytes + bufs2.nbytes) / 1e6) ti0 = log.timer('Initializing trans_e1_outcore', *time0) # fmmm, ftrans, fdrv for level 1 fmmm = libmcscf.AO2MOmmm_ket_nr_s2 ftrans = libmcscf.AO2MOtranse1_nr_s4 fdrv = libmcscf.AO2MOnr_e2_drv for istep, sh_range in enumerate(shranges): log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep + 1, nstep, *sh_range) buf = bufs1[:sh_range[2]] _ao2mo.nr_e1fill(intor, sh_range, mol._atm, mol._bas, mol._env, 's4', 1, ao2mopt, buf) if log.verbose >= logger.DEBUG1: ti1 = log.timer('AO integrals buffer', *ti0) bufpa = bufs2[:sh_range[2]] _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa) # jc_pp, kc_pp if level == 1: # ppaa, papa and vhf, jcp, kcp if log.verbose >= logger.DEBUG1: ti1 = log.timer('buffer-pa', *ti1) buf1 = bufs3[:sh_range[2]] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), buf.ctypes.data_as(ctypes.c_void_p), mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]), ctypes.c_int(nao), (ctypes.c_int * 4)(0, nao, 0, ncore), ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0)) p0 = 0 for ij in range(sh_range[0], sh_range[1]): i, j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i + 1] j1 = ao_loc[j + 1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di + 1) // 2 buf = numpy.empty((di, di, nao * ncore)) idx = numpy.tril_indices(di) buf[idx] = buf1[p0:p0 + dij] buf[idx[1], idx[0]] = buf1[p0:p0 + dij] buf = buf.reshape(di, di, nao, ncore) mo1 = mo_c[i0:i1] tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore]) tmp = lib.dot(mo1.T, tmp.reshape(di, -1)) j_pc += numpy.einsum('vp,pvc->pc', mo1, tmp.reshape(nmo, di, ncore)) tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore]) tmp = lib.dot(tmp.reshape(-1, nmo), mo).reshape(di, ncore, nmo) k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp) else: dij = di * dj buf = buf1[p0:p0 + dij].reshape(di, dj, nao, ncore) mo1 = mo_c[i0:i1] mo2 = mo_c[j0:j1] tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore]) tmp = lib.dot(mo1.T, tmp.reshape(di, -1)) j_pc += numpy.einsum('vp,pvc->pc', mo2, tmp.reshape(nmo, dj, ncore)) * 2 tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore]) tmp = lib.dot(tmp.reshape(-1, nmo), mo).reshape(dj, ncore, nmo) k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp) tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:, :ncore]) tmp = lib.dot(tmp.reshape(-1, nmo), mo).reshape(di, ncore, nmo) k_pc += numpy.einsum('up,ucp->pc', mo1, tmp) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('j_cp and k_cp', *ti1) if log.verbose >= logger.DEBUG1: ti1 = log.timer('half transformation of the buffer', *ti1) # ppaa, papa faapp_buf[str(istep)] = \ bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T p0 = 0 for ij in range(sh_range[0], sh_range[1]): i, j = _ao2mo._extract_pair(ij) i0 = ao_loc[i] j0 = ao_loc[j] i1 = ao_loc[i + 1] j1 = ao_loc[j + 1] di = i1 - i0 dj = j1 - j0 if i == j: dij = di * (di + 1) // 2 buf1 = numpy.empty((di, di, nmo * ncas)) idx = numpy.tril_indices(di) buf1[idx] = bufpa[p0:p0 + dij] buf1[idx[1], idx[0]] = bufpa[p0:p0 + dij] else: dij = di * dj buf1 = bufpa[p0:p0 + dij].reshape(di, dj, -1) mo1 = mo[j0:j1, ncore:nocc].copy() for i in range(di): lib.dot(mo1.T, buf1[i], 1, papa_buf[i0 + i], 1) mo1 = mo[i0:i1, ncore:nocc].copy() buf1 = lib.dot(mo1.T, buf1.reshape(di, -1)) papa_buf[j0:j1] += buf1.reshape(ncas, dj, -1).transpose(1, 0, 2) p0 += dij if log.verbose >= logger.DEBUG1: ti1 = log.timer('ppaa and papa buffer', *ti1) ti0 = log.timer('gen AO/transform MO [%d/%d]' % (istep + 1, nstep), *ti0) buf = buf1 = bufpa = None bufs1 = bufs2 = bufs3 = None time1 = log.timer('mc_ao2mo pass 1', *time0) log.debug1('Half transformation done. Current memory %d', lib.current_memory()[0]) nblk = int( max(8, min(nmo, (max_memory * 1e6 / 8 - papa_buf.size) / (ncas**2 * nmo)))) log.debug1('nblk for papa = %d', nblk) dset = feri.create_dataset('papa', (nmo, ncas, nmo, ncas), 'f8') for i0, i1 in prange(0, nmo, nblk): tmp = lib.dot(mo[:, i0:i1].T, papa_buf.reshape(nao, -1)) dset[i0:i1] = tmp.reshape(i1 - i0, ncas, nmo, ncas) papa_buf = tmp = None time1 = log.timer('papa pass 2', *time1) tmp = numpy.empty((ncas**2, nao_pair)) p0 = 0 for istep, sh_range in enumerate(shranges): tmp[:, p0:p0 + sh_range[2]] = faapp_buf[str(istep)] p0 += sh_range[2] nblk = int( max(8, min(nmo, (max_memory * 1e6 / 8 - tmp.size) / (ncas**2 * nmo) - 1))) log.debug1('nblk for ppaa = %d', nblk) dset = feri.create_dataset('ppaa', (nmo, nmo, ncas, ncas), 'f8') for i0, i1 in prange(0, nmo, nblk): tmp1 = _ao2mo.nr_e2(tmp, mo, (i0, i1, 0, nmo), 's4', 's1', ao_loc=ao_loc) tmp1 = tmp1.reshape(ncas, ncas, i1 - i0, nmo) for j in range(i1 - i0): dset[i0 + j] = tmp1[:, :, j].transpose(2, 0, 1) tmp = tmp1 = None time1 = log.timer('ppaa pass 2', *time1) faapp_buf.close() feri.close() _tmpfile1 = None time0 = log.timer('mc_ao2mo', *time0) return j_pc, k_pc
def half_e1(mol, mo_coeffs, swapfile, intor='cint2e_sph', aosym='s4', comp=1, max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True, ao2mopt=None): r'''Half transform arbitrary spherical AO integrals to MO integrals for the given two sets of orbitals Args: mol : :class:`Mole` object AO integrals will be generated in terms of mol._atm, mol._bas, mol._env mo_coeff : ndarray Transform (ij|kl) with the same set of orbitals. swapfile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. The transformed integrals are saved in blocks. Kwargs intor : str Name of the 2-electron integral. Ref to :func:`getints_by_shell` for the complete list of available 2-electron integral names aosym : int or str Permutation symmetry for the AO integrals | 4 or '4' or 's4': 4-fold symmetry (default) | '2ij' or 's2ij' : symmetry between i, j in (ij|kl) | '2kl' or 's2kl' : symmetry between k, l in (ij|kl) | 1 or '1' or 's1': no symmetry | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO) | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO) | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO) | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO) comp : int Components of the integrals, e.g. cint2e_ip_sph has 3 components. verbose : int Print level max_memory : float or int The maximum size of cache to use (in MB), large cache may **not** improve performance. ioblk_size : float or int The block size for IO, large block size may **not** improve performance verbose : int Print level compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals ao2mopt : :class:`AO2MOpt` object Precomputed data to improve perfomance Returns: None ''' time0 = (time.clock(), time.time()) if isinstance(verbose, logger.Logger): log = verbose else: log = logger.Logger(mol.stdout, verbose) nao = mo_coeffs[0].shape[0] aosym = _stand_sym_code(aosym) if aosym in ('s4', 's2ij'): nao_pair = nao * (nao+1) // 2 else: nao_pair = nao * nao ijmosym, nij_pair, moij, ijshape = \ incore._conc_mos(mo_coeffs[0], mo_coeffs[1], compact and aosym in ('s4', 's2ij')) e1buflen, mem_words, iobuf_words, ioblk_words = \ guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp) ioblk_size = ioblk_words * 8/1e6 # The buffer to hold AO integrals in C code, see line (@) aobuflen = max(int((mem_words - 2*comp*e1buflen*nij_pair) // (nao_pair*comp)), IOBUF_ROW_MIN) shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen) if ao2mopt is None: if intor == 'cint2e_sph': ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') else: ao2mopt = _ao2mo.AO2MOpt(mol, intor) if isinstance(swapfile, str): fswap = h5py.File(swapfile, 'w') else: fswap = swapfile for icomp in range(comp): g = fswap.create_group(str(icomp)) # for h5py old version log.debug('step1: tmpfile %s %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6) log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB', nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6) nstep = len(shranges) e1buflen = max([x[2] for x in shranges]) e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, e1buflen) def save(istep, iobuf): for icomp in range(comp): _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp], e2buflen, None) def async_write(istep, iobuf, thread_io): if thread_io is not None: thread_io.join() thread_io = lib.background_thread(save, istep, iobuf) return thread_io # transform e1 ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0) bufs1 = numpy.empty((comp*e1buflen,nao_pair)) bufs2 = numpy.empty((comp*e1buflen,nij_pair)) buf_write = numpy.empty_like(bufs2) write_handler = None for istep,sh_range in enumerate(shranges): log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \ istep+1, nstep, *(sh_range[:3])) buflen = sh_range[2] iobuf = numpy.ndarray((comp,buflen,nij_pair), buffer=bufs2) nmic = len(sh_range[3]) p0 = 0 for imic, aoshs in enumerate(sh_range[3]): log.debug2(' fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \ imic+1, nmic, *aoshs) buf = numpy.ndarray((comp*aoshs[2],nao_pair), buffer=bufs1) # (@) _ao2mo.nr_e1fill(intor, aoshs, mol._atm, mol._bas, mol._env, aosym, comp, ao2mopt, out=buf) buf = _ao2mo.nr_e1(buf, moij, ijshape, aosym, ijmosym) iobuf[:,p0:p0+aoshs[2]] = buf.reshape(comp,aoshs[2],-1) p0 += aoshs[2] ti0 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0) write_handler = async_write(istep, iobuf, write_handler) bufs2, buf_write = buf_write, bufs2 # avoid flushing writing buffer write_handler.join() bufs1 = bufs2 = None if isinstance(swapfile, str): fswap.close() return swapfile