Beispiel #1
0
def half_e1(mol, mo_coeffs, swapfile,
            intor='int2e_spinor', aosym='s4', comp=None,
            max_memory=MAX_MEMORY, ioblk_size=IOBLK_SIZE, verbose=logger.WARN,
            ao2mopt=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mol, verbose)

    ijsame = iden_coeffs(mo_coeffs[0], mo_coeffs[1])

    nmoi = mo_coeffs[0].shape[1]
    nmoj = mo_coeffs[1].shape[1]
    nao = mo_coeffs[0].shape[0]
    aosym = outcore._stand_sym_code(aosym)
    if aosym in ('s1', 's2kl', 'a2kl'):
        nao_pair = nao * nao
    else:
        nao_pair = _count_naopair(mol, nao)
    nij_pair = nmoi * nmoj

    if ijsame and aosym in ('s4', 's2ij', 'a2ij', 'a4ij', 'a4kl', 'a4'):
        log.debug('i-mo == j-mo')
        moij = numpy.asarray(mo_coeffs[0], order='F')
        ijshape = (0, nmoi, 0, nmoi)
    else:
        moij = numpy.asarray(numpy.hstack((mo_coeffs[0],mo_coeffs[1])), order='F')
        ijshape = (0, nmoi, nmoi, nmoi+nmoj)

    e1buflen, mem_words, iobuf_words, ioblk_words = \
            guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp)
# The buffer to hold AO integrals in C code
    aobuflen = int((mem_words - iobuf_words) // (nao*nao*comp))
    shranges = outcore.guess_shell_ranges(mol, (aosym not in ('s1', 's2ij', 'a2ij')),
                                          aobuflen, e1buflen, mol.ao_loc_2c(), False)
    if ao2mopt is None:
#        if intor == 'int2e_spinor':
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
#                                     'CVHFsetnr_direct_scf')
#        elif intor == 'int2e_spsp1_spinor':
#        elif intor == 'int2e_spsp1spsp2_spinor':
#        else:
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor)
        ao2mopt = _ao2mo.AO2MOpt(mol, intor)

    log.debug('step1: tmpfile %.8g MB', nij_pair*nao_pair*16/1e6)
    log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB',
              nij_pair, nao_pair, mem_words*16/1e6, iobuf_words*16/1e6)

    fswap = h5py.File(swapfile, 'w')
    for icomp in range(comp):
        g = fswap.create_group(str(icomp))  # for h5py old version

    tao = numpy.asarray(mol.tmap(), dtype=numpy.int32)

    # transform e1
    ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0)
    nstep = len(shranges)
    for istep,sh_range in enumerate(shranges):
        log.debug('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \
                  istep+1, nstep, *(sh_range[:3]))
        buflen = sh_range[2]
        iobuf = numpy.empty((comp,buflen,nij_pair), dtype=numpy.complex)
        nmic = len(sh_range[3])
        p0 = 0
        for imic, aoshs in enumerate(sh_range[3]):
            log.debug1('      fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \
                       imic+1, nmic, *aoshs)
            buf = _ao2mo.r_e1(intor, moij, ijshape, aoshs,
                              mol._atm, mol._bas, mol._env,
                              tao, aosym, comp, ao2mopt)
            iobuf[:,p0:p0+aoshs[2]] = buf
            p0 += aoshs[2]
        ti2 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)

        e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, buflen)
        for icomp in range(comp):
            dset = fswap.create_dataset('%d/%d'%(icomp,istep),
                                        (nij_pair,iobuf.shape[1]), 'c16',
                                        chunks=None)
            for col0, col1 in prange(0, nij_pair, e2buflen):
                dset[col0:col1] = lib.transpose(iobuf[icomp,:,col0:col1])
        ti0 = log.timer('transposing to disk', *ti2)
    fswap.close()
    return swapfile
Beispiel #2
0
def trans_e1_outcore(mol,
                     mo,
                     ncore,
                     ncas,
                     erifile,
                     max_memory=None,
                     level=1,
                     verbose=logger.WARN):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    log.debug1('trans_e1_outcore level %d  max_memory %d', level, max_memory)
    nao, nmo = mo.shape
    nao_pair = nao * (nao + 1) // 2
    nocc = ncore + ncas

    _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    faapp_buf = h5py.File(_tmpfile1.name)
    feri = h5py.File(erifile, 'w')

    mo_c = numpy.asarray(mo, order='C')
    mo = numpy.asarray(mo, order='F')
    pashape = (0, nmo, ncore, nocc)
    papa_buf = numpy.zeros((nao, ncas, nmo * ncas))
    j_pc = numpy.zeros((nmo, ncore))
    k_pc = numpy.zeros((nmo, ncore))

    mem_words = int(max(2000, max_memory - papa_buf.nbytes / 1e6) * 1e6 / 8)
    aobuflen = mem_words // (nao_pair + nocc * nmo) + 1
    ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32)
    shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc)
    intor = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nstep = len(shranges)
    paapp = 0
    maxbuflen = max([x[2] for x in shranges])
    log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words * 8 / 1e6,
              maxbuflen)
    bufs1 = numpy.empty((maxbuflen, nao_pair))
    bufs2 = numpy.empty((maxbuflen, nmo * ncas))
    if level == 1:
        bufs3 = numpy.empty((maxbuflen, nao * ncore))
        log.debug('mem cache %.8g MB',
                  (bufs1.nbytes + bufs2.nbytes + bufs3.nbytes) / 1e6)
    else:
        log.debug('mem cache %.8g MB', (bufs1.nbytes + bufs2.nbytes) / 1e6)
    ti0 = log.timer('Initializing trans_e1_outcore', *time0)

    # fmmm, ftrans, fdrv for level 1
    fmmm = libmcscf.AO2MOmmm_ket_nr_s2
    ftrans = libmcscf.AO2MOtranse1_nr_s4
    fdrv = libmcscf.AO2MOnr_e2_drv
    for istep, sh_range in enumerate(shranges):
        log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep + 1, nstep,
                  *sh_range)
        buf = bufs1[:sh_range[2]]
        _ao2mo.nr_e1fill(intor, sh_range, mol._atm, mol._bas, mol._env, 's4',
                         1, ao2mopt, buf)
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('AO integrals buffer', *ti0)
        bufpa = bufs2[:sh_range[2]]
        _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa)
        # jc_pp, kc_pp
        if level == 1:  # ppaa, papa and vhf, jcp, kcp
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('buffer-pa', *ti1)
            buf1 = bufs3[:sh_range[2]]
            fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p),
                 buf.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]),
                 ctypes.c_int(nao), (ctypes.c_int * 4)(0, nao, 0, ncore),
                 ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0))
            p0 = 0
            for ij in range(sh_range[0], sh_range[1]):
                i, j = _ao2mo._extract_pair(ij)
                i0 = ao_loc[i]
                j0 = ao_loc[j]
                i1 = ao_loc[i + 1]
                j1 = ao_loc[j + 1]
                di = i1 - i0
                dj = j1 - j0
                if i == j:
                    dij = di * (di + 1) // 2
                    buf = numpy.empty((di, di, nao * ncore))
                    idx = numpy.tril_indices(di)
                    buf[idx] = buf1[p0:p0 + dij]
                    buf[idx[1], idx[0]] = buf1[p0:p0 + dij]
                    buf = buf.reshape(di, di, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo1,
                                         tmp.reshape(nmo, di, ncore))
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp)
                else:
                    dij = di * dj
                    buf = buf1[p0:p0 + dij].reshape(di, dj, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    mo2 = mo_c[j0:j1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo2,
                                         tmp.reshape(nmo, dj, ncore)) * 2
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(dj, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp)
                    tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('up,ucp->pc', mo1, tmp)
                p0 += dij
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('j_cp and k_cp', *ti1)

        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('half transformation of the buffer', *ti1)

# ppaa, papa
        faapp_buf[str(istep)] = \
                bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T
        p0 = 0
        for ij in range(sh_range[0], sh_range[1]):
            i, j = _ao2mo._extract_pair(ij)
            i0 = ao_loc[i]
            j0 = ao_loc[j]
            i1 = ao_loc[i + 1]
            j1 = ao_loc[j + 1]
            di = i1 - i0
            dj = j1 - j0
            if i == j:
                dij = di * (di + 1) // 2
                buf1 = numpy.empty((di, di, nmo * ncas))
                idx = numpy.tril_indices(di)
                buf1[idx] = bufpa[p0:p0 + dij]
                buf1[idx[1], idx[0]] = bufpa[p0:p0 + dij]
            else:
                dij = di * dj
                buf1 = bufpa[p0:p0 + dij].reshape(di, dj, -1)
                mo1 = mo[j0:j1, ncore:nocc].copy()
                for i in range(di):
                    lib.dot(mo1.T, buf1[i], 1, papa_buf[i0 + i], 1)
            mo1 = mo[i0:i1, ncore:nocc].copy()
            buf1 = lib.dot(mo1.T, buf1.reshape(di, -1))
            papa_buf[j0:j1] += buf1.reshape(ncas, dj, -1).transpose(1, 0, 2)
            p0 += dij
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('ppaa and papa buffer', *ti1)

        ti0 = log.timer('gen AO/transform MO [%d/%d]' % (istep + 1, nstep),
                        *ti0)
    buf = buf1 = bufpa = None
    bufs1 = bufs2 = bufs3 = None
    time1 = log.timer('mc_ao2mo pass 1', *time0)

    log.debug1('Half transformation done. Current memory %d',
               lib.current_memory()[0])

    nblk = int(
        max(8,
            min(nmo,
                (max_memory * 1e6 / 8 - papa_buf.size) / (ncas**2 * nmo))))
    log.debug1('nblk for papa = %d', nblk)
    dset = feri.create_dataset('papa', (nmo, ncas, nmo, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp = lib.dot(mo[:, i0:i1].T, papa_buf.reshape(nao, -1))
        dset[i0:i1] = tmp.reshape(i1 - i0, ncas, nmo, ncas)
    papa_buf = tmp = None
    time1 = log.timer('papa pass 2', *time1)

    tmp = numpy.empty((ncas**2, nao_pair))
    p0 = 0
    for istep, sh_range in enumerate(shranges):
        tmp[:, p0:p0 + sh_range[2]] = faapp_buf[str(istep)]
        p0 += sh_range[2]
    nblk = int(
        max(8, min(nmo,
                   (max_memory * 1e6 / 8 - tmp.size) / (ncas**2 * nmo) - 1)))
    log.debug1('nblk for ppaa = %d', nblk)
    dset = feri.create_dataset('ppaa', (nmo, nmo, ncas, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp1 = _ao2mo.nr_e2(tmp,
                            mo, (i0, i1, 0, nmo),
                            's4',
                            's1',
                            ao_loc=ao_loc)
        tmp1 = tmp1.reshape(ncas, ncas, i1 - i0, nmo)
        for j in range(i1 - i0):
            dset[i0 + j] = tmp1[:, :, j].transpose(2, 0, 1)
    tmp = tmp1 = None
    time1 = log.timer('ppaa pass 2', *time1)

    faapp_buf.close()
    feri.close()
    _tmpfile1 = None
    time0 = log.timer('mc_ao2mo', *time0)
    return j_pc, k_pc
Beispiel #3
0
def half_e1(mol, mo_coeffs, swapfile,
            intor='cint2e', aosym='s4', comp=1,
            max_memory=4000, ioblk_size=256, verbose=logger.WARN, ao2mopt=None):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)

    ijsame = iden_coeffs(mo_coeffs[0], mo_coeffs[1])

    nmoi = mo_coeffs[0].shape[1]
    nmoj = mo_coeffs[1].shape[1]
    nao = mo_coeffs[0].shape[0]
    aosym = _stand_sym_code(aosym)
    if aosym in ('s1', 's2kl', 'a2kl'):
        nao_pair = nao * nao
    else:
        nao_pair = guess_nao_pair(mol, nao)
    nij_pair = nmoi * nmoj

    if  ijsame and aosym in ('s4', 's2ij', 'a2ij', 'a4ij', 'a4kl', 'a4'):
        log.debug('i-mo == j-mo')
        moij = numpy.asarray(mo_coeffs[0], order='F')
        ijshape = (0, nmoi, 0, nmoi)
    else:
        moij = numpy.asarray(numpy.hstack((mo_coeffs[0],mo_coeffs[1])), order='F')
        ijshape = (0, nmoi, nmoi, nmoi+nmoj)

    e1buflen, mem_words, iobuf_words, ioblk_words = \
            guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp)
# The buffer to hold AO integrals in C code
    aobuflen = int((mem_words - iobuf_words) // (nao*nao*comp))
    shranges = outcore.guess_shell_ranges(mol, (aosym not in ('s1', 's2kl', 'a2kl')),
                                          aobuflen, e1buflen, mol.ao_loc_2c(), False)
    if ao2mopt is None:
#        if intor == 'cint2e':
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
#                                     'CVHFsetnr_direct_scf')
#        elif intor == 'cint2e_spsp1':
#        elif intor == 'cint2e_spsp1spsp2':
#        else:
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor)
        ao2mopt = _ao2mo.AO2MOpt(mol, intor)

    log.debug('step1: tmpfile %.8g MB', nij_pair*nao_pair*16/1e6)
    log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB',
              nij_pair, nao_pair, mem_words*16/1e6, iobuf_words*16/1e6)

    fswap = h5py.File(swapfile, 'w')
    for icomp in range(comp):
        g = fswap.create_group(str(icomp))  # for h5py old version

    tao = numpy.asarray(mol.tmap(), dtype=numpy.int32)

    # transform e1
    ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0)
    nstep = len(shranges)
    for istep,sh_range in enumerate(shranges):
        log.debug('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \
                  istep+1, nstep, *(sh_range[:3]))
        buflen = sh_range[2]
        iobuf = numpy.empty((comp,buflen,nij_pair), dtype=numpy.complex)
        nmic = len(sh_range[3])
        p0 = 0
        for imic, aoshs in enumerate(sh_range[3]):
            log.debug1('      fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \
                       imic+1, nmic, *aoshs)
            buf = _ao2mo.r_e1(intor, moij, ijshape, aoshs,
                              mol._atm, mol._bas, mol._env,
                              tao, aosym, comp, ao2mopt)
            iobuf[:,p0:p0+aoshs[2]] = buf
            p0 += aoshs[2]
        ti2 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)

        e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, buflen)
        for icomp in range(comp):
            dset = fswap.create_dataset('%d/%d'%(icomp,istep),
                                        (nij_pair,iobuf.shape[1]), 'c16',
                                        chunks=None)
            for col0, col1 in prange(0, nij_pair, e2buflen):
                dset[col0:col1] = pyscf.lib.transpose(iobuf[icomp,:,col0:col1])
        ti0 = log.timer('transposing to disk', *ti2)
    fswap.close()
    return swapfile
Beispiel #4
0
def trans_e1_outcore(mol, mo, ncore, ncas, erifile,
                     max_memory=None, level=1, verbose=logger.WARN):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    log.debug1('trans_e1_outcore level %d  max_memory %d', level, max_memory)
    nao, nmo = mo.shape
    nao_pair = nao*(nao+1)//2
    nocc = ncore + ncas

    _tmpfile1 = tempfile.NamedTemporaryFile()
    faapp_buf = h5py.File(_tmpfile1.name)
    feri = h5py.File(erifile, 'w')

    mo_c = numpy.asarray(mo, order='C')
    mo = numpy.asarray(mo, order='F')
    pashape = (0, nmo, ncore, nocc)
    papa_buf = numpy.zeros((nao,ncas,nmo*ncas))
    j_pc = numpy.zeros((nmo,ncore))
    k_pc = numpy.zeros((nmo,ncore))

    mem_words = int(max(2000,max_memory-papa_buf.nbytes/1e6)*1e6/8)
    aobuflen = mem_words//(nao_pair+nocc*nmo) + 1
    ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32)
    shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc)
    ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph',
                             'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf')
    nstep = len(shranges)
    paapp = 0
    maxbuflen = max([x[2] for x in shranges])
    log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words*8/1e6, maxbuflen)
    bufs1 = numpy.empty((maxbuflen, nao_pair))
    bufs2 = numpy.empty((maxbuflen, nmo*ncas))
    if level == 1:
        bufs3 = numpy.empty((maxbuflen, nao*ncore))
        log.debug('mem cache %.8g MB',
                  (bufs1.nbytes+bufs2.nbytes+bufs3.nbytes)/1e6)
    else:
        log.debug('mem cache %.8g MB', (bufs1.nbytes+bufs2.nbytes)/1e6)
    ti0 = log.timer('Initializing trans_e1_outcore', *time0)

    # fmmm, ftrans, fdrv for level 1
    fmmm = _fpointer('MCSCFhalfmmm_nr_s2_ket')
    ftrans = _fpointer('AO2MOtranse1_nr_s4')
    fdrv = getattr(libmcscf, 'AO2MOnr_e2_drv')
    for istep,sh_range in enumerate(shranges):
        log.debug('[%d/%d], AO [%d:%d], len(buf) = %d',
                  istep+1, nstep, *sh_range)
        buf = bufs1[:sh_range[2]]
        _ao2mo.nr_e1fill('cint2e_sph', sh_range,
                         mol._atm, mol._bas, mol._env, 's4', 1, ao2mopt, buf)
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('AO integrals buffer', *ti0)
        bufpa = bufs2[:sh_range[2]]
        _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa)
# jc_pp, kc_pp
        if level == 1: # ppaa, papa and vhf, jcp, kcp
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('buffer-pa', *ti1)
            buf1 = bufs3[:sh_range[2]]
            fdrv(ftrans, fmmm,
                 buf1.ctypes.data_as(ctypes.c_void_p),
                 buf.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p),
                 ctypes.c_int(sh_range[2]), ctypes.c_int(nao),
                 ctypes.c_int(0), ctypes.c_int(nao),
                 ctypes.c_int(0), ctypes.c_int(ncore),
                 ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0))
            p0 = 0
            for ij in range(sh_range[0], sh_range[1]):
                i,j = _ao2mo._extract_pair(ij)
                i0 = ao_loc[i]
                j0 = ao_loc[j]
                i1 = ao_loc[i+1]
                j1 = ao_loc[j+1]
                di = i1 - i0
                dj = j1 - j0
                if i == j:
                    dij = di * (di+1) // 2
                    buf = numpy.empty((di,di,nao*ncore))
                    idx = numpy.tril_indices(di)
                    buf[idx] = buf1[p0:p0+dij]
                    buf[idx[1],idx[0]] = buf1[p0:p0+dij]
                    buf = buf.reshape(di,di,nao,ncore)
                    mo1 = mo_c[i0:i1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore])
                    tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di,-1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo1, tmp.reshape(nmo,di,ncore))
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:,:ncore])
                    tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(di,ncore,nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp)
                else:
                    dij = di * dj
                    buf = buf1[p0:p0+dij].reshape(di,dj,nao,ncore)
                    mo1 = mo_c[i0:i1]
                    mo2 = mo_c[j0:j1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore])
                    tmp = pyscf.lib.dot(mo1.T, tmp.reshape(di,-1))
                    j_pc += numpy.einsum('vp,pvc->pc',
                                         mo2, tmp.reshape(nmo,dj,ncore)) * 2
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:,:ncore])
                    tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(dj,ncore,nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp)
                    tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:,:ncore])
                    tmp = pyscf.lib.dot(tmp.reshape(-1,nmo), mo).reshape(di,ncore,nmo)
                    k_pc += numpy.einsum('up,ucp->pc', mo1, tmp)
                p0 += dij
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('j_cp and k_cp', *ti1)

        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('half transformation of the buffer', *ti1)

# ppaa, papa
        faapp_buf[str(istep)] = \
                bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T
        p0 = 0
        for ij in range(sh_range[0], sh_range[1]):
            i,j = _ao2mo._extract_pair(ij)
            i0 = ao_loc[i]
            j0 = ao_loc[j]
            i1 = ao_loc[i+1]
            j1 = ao_loc[j+1]
            di = i1 - i0
            dj = j1 - j0
            if i == j:
                dij = di * (di+1) // 2
                buf1 = numpy.empty((di,di,nmo*ncas))
                idx = numpy.tril_indices(di)
                buf1[idx] = bufpa[p0:p0+dij]
                buf1[idx[1],idx[0]] = bufpa[p0:p0+dij]
            else:
                dij = di * dj
                buf1 = bufpa[p0:p0+dij].reshape(di,dj,-1)
                mo1 = mo[j0:j1,ncore:nocc].copy()
                for i in range(di):
                     pyscf.lib.dot(mo1.T, buf1[i], 1, papa_buf[i0+i], 1)
            mo1 = mo[i0:i1,ncore:nocc].copy()
            buf1 = pyscf.lib.dot(mo1.T, buf1.reshape(di,-1))
            papa_buf[j0:j1] += buf1.reshape(ncas,dj,-1).transpose(1,0,2)
            p0 += dij
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('ppaa and papa buffer', *ti1)

        ti0 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)
    buf = buf1 = bufpa = None
    bufs1 = bufs2 = bufs3 = None
    time1 = log.timer('mc_ao2mo pass 1', *time0)

    log.debug1('Half transformation done. Current memory %d',
               pyscf.lib.current_memory()[0])

    nblk = int(max(8, min(nmo, (max_memory*1e6/8-papa_buf.size)/(ncas**2*nmo))))
    log.debug1('nblk for papa = %d', nblk)
    dset = feri.create_dataset('papa', (nmo,ncas,nmo,ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp = pyscf.lib.dot(mo[:,i0:i1].T, papa_buf.reshape(nao,-1))
        dset[i0:i1] = tmp.reshape(i1-i0,ncas,nmo,ncas)
    papa_buf = tmp = None
    time1 = log.timer('papa pass 2', *time1)

    tmp = numpy.empty((ncas**2,nao_pair))
    p0 = 0
    for istep, sh_range in enumerate(shranges):
        tmp[:,p0:p0+sh_range[2]] = faapp_buf[str(istep)]
        p0 += sh_range[2]
    nblk = int(max(8, min(nmo, (max_memory*1e6/8-tmp.size)/(ncas**2*nmo)-1)))
    log.debug1('nblk for ppaa = %d', nblk)
    dset = feri.create_dataset('ppaa', (nmo,nmo,ncas,ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp1 = _ao2mo.nr_e2(tmp, mo, (i0,i1,0,nmo), 's4', 's1', ao_loc=ao_loc)
        tmp1 = tmp1.reshape(ncas,ncas,i1-i0,nmo)
        for j in range(i1-i0):
            dset[i0+j] = tmp1[:,:,j].transpose(2,0,1)
    tmp = tmp1 = None
    time1 = log.timer('ppaa pass 2', *time1)

    faapp_buf.close()
    feri.close()
    _tmpfile1 = None
    time0 = log.timer('mc_ao2mo', *time0)
    return j_pc, k_pc
Beispiel #5
0
def light_e1_outcore(mol, mo, ncore, ncas,
                     max_memory=None, approx=1, verbose=logger.WARN):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    nao, nmo = mo.shape
    nao_pair = nao*(nao+1)//2
    nocc = ncore + ncas
    aapp_buf = numpy.empty((nao_pair,ncas,ncas))
    appa_buf = numpy.zeros((ncas,nao,nmo*ncas))
    max_memory -= (aapp_buf.nbytes+appa_buf.nbytes) / 1e6

    mo = numpy.asarray(mo, order='F')
    nao, nmo = mo.shape
    pashape = (0, nmo, ncore, ncas)
    if approx == 1:
        jc = numpy.empty((nao,nao,ncore))
        kc = numpy.zeros((nao,nao,ncore))
    else:
        dm_core = numpy.dot(mo[:,:ncore], mo[:,:ncore].T) * 2
        jc = numpy.zeros((nao,nao))
        kc = numpy.zeros((nao,nao))
    max_memory -= (jc.nbytes+kc.nbytes) / 1e6

    mem_words = int(max(1000,max_memory)*1e6/8)
    aobuflen = mem_words//(nao_pair+nocc*nmo) + 1
    shranges = outcore.guess_shell_ranges(mol, aobuflen, aobuflen, 's4')
    ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph',
                             'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf')
    ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32)
    log.debug('mem cache %.8g MB', mem_words*8/1e6)
    ti0 = log.timer('Initializing light_e1_outcore', *time0)
    nstep = len(shranges)
    paapp = 0
    maxbuflen = max([x[2] for x in shranges])
    bufs1 = numpy.empty((maxbuflen, nao_pair))
    bufs2 = numpy.empty((maxbuflen, pashape[1]*pashape[3]))
    bufs3 = numpy.empty((maxbuflen, nao*ncore))
    for istep,sh_range in enumerate(shranges):
        log.debug('[%d/%d], AO [%d:%d], len(buf) = %d',
                  istep+1, nstep, *(sh_range[:3]))
        buf = bufs1[:sh_range[2]]
        _ao2mo.nr_e1fill_('cint2e_sph', sh_range[:3],
                          mol._atm, mol._bas, mol._env, 's4', 1, ao2mopt, buf)
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('AO integrals buffer', *ti0)
        bufpa = bufs2[:sh_range[2]]
# jc_pp, kc_pp
        if approx == 1: # aapp, appa and vhf, jcp, kcp
            _ao2mo.nr_e1_(buf, mo, pashape, 's4', 's1', vout=bufpa)
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('buffer-pa', *ti1)
            buf1 = bufs3[:sh_range[2]]
            fmmm = _fpointer('MCSCFhalfmmm_nr_s2_ket')
            ftrans = _fpointer('AO2MOtranse1_nr_s4')
            fdrv = getattr(libmcscf, 'AO2MOnr_e2_drv')
            fdrv(ftrans, fmmm,
                 buf1.ctypes.data_as(ctypes.c_void_p),
                 buf.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p),
                 ctypes.c_int(sh_range[2]), ctypes.c_int(nao),
                 ctypes.c_int(0), ctypes.c_int(nao),
                 ctypes.c_int(0), ctypes.c_int(ncore),
                 ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0))
            p0 = 0
            for ij in range(sh_range[0], sh_range[1]):
                i,j = _ao2mo._extract_pair(ij)
                i0 = ao_loc[i]
                j0 = ao_loc[j]
                i1 = ao_loc[i+1]
                j1 = ao_loc[j+1]
                di = i1 - i0
                dj = j1 - j0
                if i == j:
                    dij = di * (di+1) // 2
                    buf = numpy.empty((di,di,nao*ncore))
                    idx = numpy.tril_indices(di)
                    buf[idx] = buf1[p0:p0+dij]
                    buf[idx[1],idx[0]] = buf1[p0:p0+dij]
                    buf = buf.reshape(di,di,nao,ncore)
                    jc[i0:i1,j0:j1] = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore])
                    kc[j0:j1] += numpy.einsum('uvpc,uc->vpc', buf, mo[i0:i1,:ncore])
                else:
                    dij = di * dj
                    buf = buf1[p0:p0+dij].reshape(di,dj,nao,ncore)
                    jc[i0:i1,j0:j1] = numpy.einsum('uvpc,pc->uvc', buf, mo[:,:ncore])
                    jc[j0:j1,i0:i1] = jc[i0:i1,j0:j1].transpose(1,0,2)
                    kc[j0:j1] += numpy.einsum('uvpc,uc->vpc', buf, mo[i0:i1,:ncore])
                    kc[i0:i1] += numpy.einsum('uvpc,vc->upc', buf, mo[j0:j1,:ncore])
                p0 += dij
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('jc and kc buffer', *ti1)
        elif approx == 2: # aapp, appa, vhf
            fdrv = libmcscf.MCSCFnrs4_aapp_jk
            fdrv(buf.ctypes.data_as(ctypes.c_void_p),
                 bufpa.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p),
                 dm_core.ctypes.data_as(ctypes.c_void_p),
                 jc.ctypes.data_as(ctypes.c_void_p),
                 kc.ctypes.data_as(ctypes.c_void_p),
                 ctypes.c_int(sh_range[0]), ctypes.c_int(sh_range[1]-sh_range[0]),
                 ctypes.c_int(pashape[0]), ctypes.c_int(pashape[1]),
                 ctypes.c_int(pashape[2]), ctypes.c_int(pashape[3]),
                 ao_loc.ctypes.data_as(ctypes.c_void_p),
                 ctypes.c_int(mol.nbas))
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('bufpa + core vj and vk', *ti1)
        else: # aapp, appa
            _ao2mo.nr_e1_(buf, mo, pashape, 's4', 's1', vout=bufpa)

# aapp, appa
        aapp_buf[paapp:paapp+sh_range[2]] = \
                bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc]
        paapp += sh_range[2]
        p0 = 0
        for ij in range(sh_range[0], sh_range[1]):
            i,j = _ao2mo._extract_pair(ij)
            i0 = ao_loc[i]
            j0 = ao_loc[j]
            i1 = ao_loc[i+1]
            j1 = ao_loc[j+1]
            di = i1 - i0
            dj = j1 - j0
            if i == j:
                dij = di * (di+1) // 2
                buf1 = numpy.empty((di,di,nmo*ncas))
                idx = numpy.tril_indices(di)
                buf1[idx] = bufpa[p0:p0+dij]
                buf1[idx[1],idx[0]] = bufpa[p0:p0+dij]
                buf1 = buf1.reshape(di,-1)
            else:
                dij = di * dj
                buf1 = bufpa[p0:p0+dij].reshape(di,dj,-1)
                mo1 = mo[j0:j1,ncore:nocc].copy()
                for i in range(di):
                    appa_buf[:,i0+i] += pyscf.lib.dot(mo1.T, buf1[i])
                buf1 = bufpa[p0:p0+dij].reshape(di,-1)
            mo1 = mo[i0:i1,ncore:nocc].copy()
            appa_buf[:,j0:j1] += pyscf.lib.dot(mo1.T, buf1).reshape(ncas,dj,-1)
            p0 += dij
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('aapp and appa buffer', *ti1)

        ti0 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)
    bufs1 = bufs2 = bufs3 = None

    aapp_buf = pyscf.lib.transpose(aapp_buf.reshape(nao_pair,-1))
    aapp = _ao2mo.nr_e2_(aapp_buf, mo, (0,nmo,0,nmo), 's4', 's1', ao_loc=ao_loc)
    aapp = aapp.reshape(ncas,ncas,nmo,nmo)
    aapp_buf = None
    if nao == nmo:
        appa = appa_buf
    else:
        appa = numpy.empty((ncas,nao,nmo*ncas))
    for i in range(ncas):
        appa[i] = numpy.dot(mo.T, appa_buf[i].reshape(nao,-1))
    appa = appa.reshape(ncas,nmo,nmo,ncas)
    appa_buf = None

    if approx == 1:
        vhf_c = numpy.einsum('ijc->ij', jc)*2 - numpy.einsum('ijc->ij', kc)
        vhf_c = reduce(numpy.dot, (mo.T, vhf_c, mo))
        j_cp = numpy.dot(mo.T, jc.reshape(nao,-1)).reshape(nao,nao,ncore)
        j_cp = numpy.einsum('pj,jpi->ij', mo, j_cp)
        k_cp = numpy.dot(mo.T, kc.reshape(nao,-1)).reshape(nao,nao,ncore)
        k_cp = numpy.einsum('pj,jpi->ij', mo, k_cp)
    elif approx == 2:
        jc = pyscf.lib.hermi_triu(jc, hermi=1, inplace=True)
        kc = pyscf.lib.hermi_triu(kc, hermi=1, inplace=True)
        vhf_c = reduce(numpy.dot, (mo.T, jc-kc*.5, mo))
        j_cp = k_cp = None
    elif approx == 3:
        vhf_c = j_cp = k_cp = None

    time0 = log.timer('mc_ao2mo', *time0)
    return vhf_c, j_cp, k_cp, aapp, appa