Example #1
0
def kernel(mycc,
           eris=None,
           t1=None,
           t2=None,
           max_cycle=50,
           tol=1e-8,
           tolnormt=1e-6,
           verbose=None):
    log = logger.new_logger(mycc, verbose)
    cput0 = (logger.process_clock(), logger.perf_counter())
    _sync_(mycc)

    eris = getattr(mycc, '_eris', None)
    if eris is None:
        mycc.ao2mo(mycc.mo_coeff)
        eris = mycc._eris

    cput1 = (logger.process_clock(), logger.perf_counter())

    # Use the existed amplitudes as initial guess
    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if t1 is None and t2 is None:
        t1, t2 = mycc.get_init_guess(eris)
    elif t2 is None:
        t2 = mycc.get_init_guess(eris)[1]

    eold = 0
    eccsd = mycc.energy(t1, t2, eris)
    log.info('Init E(CCSD) = %.15g', eccsd)

    if isinstance(mycc.diis, diis.DistributedDIIS):
        adiis = mycc.diis
    elif mycc.diis:
        adiis = diis.DistributedDIIS(mycc, mycc.diis_file)
        adiis.space = mycc.diis_space
    else:
        adiis = None

    conv = False
    for istep in range(max_cycle):
        t1new, t2new = mycc.update_amps(t1, t2, eris)
        normt = _diff_norm(mycc, t1new, t2new, t1, t2)
        t1, t2 = t1new, t2new
        t1new = t2new = None

        t1, t2 = mycc.run_diis(t1, t2, istep, normt, eccsd - eold, adiis)
        eold, eccsd = eccsd, mycc.energy(t1, t2, eris)
        log.info('cycle = %d  E(CCSD) = %.15g  dE = %.9g  norm(t1,t2) = %.6g',
                 istep + 1, eccsd, eccsd - eold, normt)
        cput1 = log.timer('CCSD iter', *cput1)
        if abs(eccsd - eold) < tol and normt < tolnormt:
            conv = True
            break

    mycc.e_corr = eccsd
    mycc.t1 = t1
    mycc.t2 = t2
    log.timer('CCSD', *cput0)
    return conv, eccsd, t1, t2
Example #2
0
def kernel(mycc, eris=None):
    cpu0 = (time.clock(), time.time())
    ccsd._sync_(mycc)
    log = logger.new_logger(mycc)

    eris = getattr(mycc, '_eris', None)
    if eris is None:
        mycc.ao2mo(mycc.mo_coeff)
        eris = mycc._eris

    t1T = numpy.asarray(mycc.t1.T, order='C')
    nvir, nocc = t1T.shape

    fvo = eris.fock[nocc:,:nocc].copy()
    mo_energy = eris.mo_energy.copy()
    et_sum = numpy.zeros(1, dtype=t1T.dtype)
    drv = _ccsd.libcc.MPICCsd_t_contract
    cpu2 = [time.clock(), time.time()]
    def contract(slices, data):
        #vvop_ab, vvop_ac, vvop_ba, vvop_bc, vvop_ca, vvop_cb, \
        #        vooo_a, vooo_b, vooo_c, t2T_a, t2T_b, t2T_c = data
        data_ptrs = [x.ctypes.data_as(ctypes.c_void_p) for x in data]
        data_ptrs = (ctypes.c_void_p*12)(*data_ptrs)
        drv(et_sum.ctypes.data_as(ctypes.c_void_p),
            mo_energy.ctypes.data_as(ctypes.c_void_p),
            t1T.ctypes.data_as(ctypes.c_void_p),
            fvo.ctypes.data_as(ctypes.c_void_p),
            ctypes.c_int(nocc), ctypes.c_int(nvir),
            (ctypes.c_int*6)(*slices), data_ptrs)
        cpu2[:] = log.alltimer_debug1('contract'+str(slices), *cpu2)

    with GlobalDataHandler(mycc) as daemon:
        v_seg_ranges = daemon.data_partition
        tasks = []
        for ka, (a0, a1) in enumerate(v_seg_ranges):
            for kb, (b0, b1) in enumerate(v_seg_ranges[:ka+1]):
                for c0, c1 in v_seg_ranges[:kb+1]:
                    tasks.append((a0, a1, b0, b1, c0, c1))
        log.debug('ntasks = %d', len(tasks))

        task_count = 0
        with lib.call_in_background(contract) as async_contract:
            #for task in mpi.static_partition(tasks):
            #for task in mpi.work_stealing_partition(tasks):
            for task in mpi.work_share_partition(tasks, loadmin=2):
                log.alldebug2('request for segment %s', task)
                data = [None] * 12
                daemon.request_(task, data)
                async_contract(task, data)
                task_count += 1
        log.alldebug1('task_count = %d', task_count)

    et = comm.allreduce(et_sum[0] * 2).real
    log.timer('CCSD(T)', *cpu0)
    log.note('CCSD(T) correction = %.15g', et)
    return et
Example #3
0
    def start(self, interval=0.02):
        mycc = self._cc
        log = logger.new_logger(mycc)
        cpu1 = (logger.process_clock(), logger.perf_counter())
        eris = mycc._eris
        t2T = mycc.t2.transpose(2, 3, 0, 1)

        nocc, nvir = mycc.t1.shape
        nmo = nocc + nvir
        vloc0, vloc1 = self.vranges[rank]
        nvir_seg = vloc1 - vloc0

        max_memory = min(24000, mycc.max_memory - lib.current_memory()[0])
        blksize = min(
            nvir_seg // 4 + 1,
            max(16, int(max_memory * .3e6 / 8 / (nvir * nocc * nmo))))
        self.eri_tmp = lib.H5TmpFile()
        vvop = self.eri_tmp.create_dataset('vvop', (nvir_seg, nvir, nocc, nmo),
                                           'f8')

        def save_vvop(j0, j1, vvvo):
            buf = numpy.empty((j1 - j0, nvir, nocc, nmo), dtype=t2T.dtype)
            buf[:, :, :, :nocc] = eris.ovov[:, j0:j1].conj().transpose(
                1, 3, 0, 2)
            for k, (q0, q1) in enumerate(self.vranges):
                blk = vvvo[k].reshape(q1 - q0, nvir, j1 - j0, nocc)
                buf[:, q0:q1, :, nocc:] = blk.transpose(2, 0, 3, 1)
            vvop[j0:j1] = buf

        with lib.call_in_background(save_vvop) as save_vvop:
            for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
                j0, j1 = p0 - vloc0, p1 - vloc0
                sub_locs = comm.allgather((p0, p1))
                vvvo = mpi.alltoall_new(
                    [eris.vvvo[:, :, q0:q1] for q0, q1 in sub_locs],
                    split_recvbuf=True)
                save_vvop(j0, j1, vvvo)
                cpu1 = log.timer_debug1('transpose %d:%d' % (p0, p1), *cpu1)

        def send_data():
            while True:
                while comm.Iprobe(source=MPI.ANY_SOURCE, tag=INQUIRY):
                    tensors, dest = comm.recv(source=MPI.ANY_SOURCE,
                                              tag=INQUIRY)
                    for task, slices in tensors:
                        if task == 'Done':
                            return
                        else:
                            mpi.send(self._get_tensor(task, slices),
                                     dest,
                                     tag=TRANSFER_DATA)
                time.sleep(interval)

        daemon = threading.Thread(target=send_data)
        daemon.start()
        return daemon
Example #4
0
def kernel(mycc, eris=None, t1=None, t2=None, max_cycle=50, tol=1e-8,
           tolnormt=1e-6, verbose=None):
    log = logger.new_logger(mycc, verbose)
    cput1 = cput0 = (time.clock(), time.time())
    _sync_(mycc)

    eris = getattr(mycc, '_eris', None)
    if eris is None:
        mycc.ao2mo(mycc.mo_coeff)
        eris = mycc._eris

    # Use the existed amplitudes as initial guess
    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if t1 is None and t2 is None:
        t1, t2 = mycc.get_init_guess(eris)
    elif t2 is None:
        t2 = mycc.get_init_guess(eris)[1]

    eold = 0
    eccsd = mycc.energy(t1, t2, eris)
    log.info('Init E(CCSD) = %.15g', eccsd)

    if isinstance(mycc.diis, diis.DistributedDIIS):
        adiis = mycc.diis
    elif mycc.diis:
        adiis = diis.DistributedDIIS(mycc, mycc.diis_file)
        adiis.space = mycc.diis_space
    else:
        adiis = None

    conv = False
    for istep in range(max_cycle):
        t1new, t2new = mycc.update_amps(t1, t2, eris)
        normt = _diff_norm(mycc, t1new, t2new, t1, t2)
        t1, t2 = t1new, t2new
        t1new = t2new = None

        t1, t2 = mycc.run_diis(t1, t2, istep, normt, eccsd-eold, adiis)
        eold, eccsd = eccsd, mycc.energy(t1, t2, eris)
        log.info('cycle = %d  E(CCSD) = %.15g  dE = %.9g  norm(t1,t2) = %.6g',
                 istep+1, eccsd, eccsd - eold, normt)
        cput1 = log.timer('CCSD iter', *cput1)
        if abs(eccsd-eold) < tol and normt < tolnormt:
            conv = True
            break

    mycc.e_corr = eccsd
    mycc.t1 = t1
    mycc.t2 = t2
    log.timer('CCSD', *cput0)
    return conv, eccsd, t1, t2
Example #5
0
    def start(self, interval=0.02):
        mycc = self._cc
        log = logger.new_logger(mycc)
        cpu1 = (time.clock(), time.time())
        eris = mycc._eris
        t2T = mycc.t2.transpose(2,3,0,1)

        nocc, nvir = mycc.t1.shape
        nmo = nocc + nvir
        vloc0, vloc1 = self.vranges[rank]
        nvir_seg = vloc1 - vloc0

        max_memory = min(24000, mycc.max_memory - lib.current_memory()[0])
        blksize = min(nvir_seg//4+1, max(16, int(max_memory*.3e6/8/(nvir*nocc*nmo))))
        self.eri_tmp = lib.H5TmpFile()
        vvop = self.eri_tmp.create_dataset('vvop', (nvir_seg,nvir,nocc,nmo), 'f8')

        def save_vvop(j0, j1, vvvo):
            buf = numpy.empty((j1-j0,nvir,nocc,nmo), dtype=t2T.dtype)
            buf[:,:,:,:nocc] = eris.ovov[:,j0:j1].conj().transpose(1,3,0,2)
            for k, (q0, q1) in enumerate(self.vranges):
                blk = vvvo[k].reshape(q1-q0,nvir,j1-j0,nocc)
                buf[:,q0:q1,:,nocc:] = blk.transpose(2,0,3,1)
            vvop[j0:j1] = buf

        with lib.call_in_background(save_vvop) as save_vvop:
            for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
                j0, j1 = p0 - vloc0, p1 - vloc0
                sub_locs = comm.allgather((p0,p1))
                vvvo = mpi.alltoall([eris.vvvo[:,:,q0:q1] for q0, q1 in sub_locs],
                                    split_recvbuf=True)
                save_vvop(j0, j1, vvvo)
                cpu1 = log.timer_debug1('transpose %d:%d'%(p0,p1), *cpu1)

        def send_data():
            while True:
                while comm.Iprobe(source=MPI.ANY_SOURCE, tag=INQUIRY):
                    tensors, dest = comm.recv(source=MPI.ANY_SOURCE, tag=INQUIRY)
                    for task, slices in tensors:
                        if task == 'Done':
                            return
                        else:
                            mpi.send(self._get_tensor(task, slices), dest,
                                     tag=TRANSFER_DATA)
                time.sleep(interval)

        daemon = threading.Thread(target=send_data)
        daemon.start()
        return daemon
Example #6
0
def kernel(mycc,
           eris=None,
           t1=None,
           t2=None,
           l1=None,
           l2=None,
           max_cycle=50,
           tol=1e-6,
           verbose=None,
           fintermediates=None,
           fupdate=None,
           approx_l=False):
    """
    CCSD lambda kernel.
    """
    log = logger.new_logger(mycc, verbose)
    cput0 = (logger.process_clock(), logger.perf_counter())
    _sync_(mycc)

    eris = getattr(mycc, '_eris', None)
    if eris is None:
        mycc.ao2mo(mycc.mo_coeff)
        eris = mycc._eris

    if t1 is None:
        t1 = mycc.t1
    if t2 is None:
        t2 = mycc.t2
    if l1 is None:
        if mycc.l1 is None:
            l1 = t1
        else:
            l1 = mycc.l1
    if l2 is None:
        if mycc.l2 is None:
            l2 = t2
        else:
            l2 = mycc.l2

    t1 = np.zeros_like(t1)
    l1 = np.zeros_like(l1)

    if approx_l:
        mycc.l1 = l1
        mycc.l2 = l2
        conv = True
        return conv, l1, l2

    if fintermediates is None:
        fintermediates = make_intermediates
    if fupdate is None:
        fupdate = update_lambda

    imds = fintermediates(mycc, t1, t2, eris)

    if isinstance(mycc.diis, diis.DistributedDIIS):
        adiis = mycc.diis
    elif mycc.diis:
        adiis = diis.DistributedDIIS(mycc, mycc.diis_file)
        adiis.space = mycc.diis_space
    else:
        adiis = None
    cput1 = log.timer('CCSD lambda initialization', *cput0)

    conv = False
    for istep in range(max_cycle):
        l1new, l2new = fupdate(mycc, t1, t2, l1, l2, eris, imds)
        normt = _diff_norm(mycc, l1new, l2new, l1, l2)
        l1, l2 = l1new, l2new
        l1new = l2new = None
        l1, l2 = mycc.run_diis(l1, l2, istep, normt, 0, adiis)
        log.info('cycle = %d  norm(lambda1,lambda2) = %.6g', istep + 1, normt)
        cput1 = log.timer('CCSD iter', *cput1)
        if normt < tol:
            conv = True
            break

    mycc.l1 = l1
    mycc.l2 = l2
    log.timer('CCSD lambda', *cput0)
    return conv, l1, l2
Example #7
0
def _contract_vvvv_t2(mycc, vvvv, t2T, task_locs, out=None, verbose=None):
    '''Ht2 = numpy.einsum('ijcd,acbd->ijab', t2, vvvv)
    where vvvv has to be real and has the 4-fold permutation symmetry

    Args:
        vvvv : None or integral object
            if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm
    '''
    time0 = time.clock(), time.time()
    mol = mycc.mol
    log = logger.new_logger(mycc, verbose)

    if callable(t2T):
        t2T = t2T()
    assert (t2T.dtype == numpy.double)
    nvira, nvirb = t2T.shape[:2]
    nvir2 = nvira * nvirb
    t2T = t2T.reshape(nvira, nvirb, -1)
    nocc2 = t2T.shape[2]
    Ht2 = numpy.ndarray(t2T.shape, dtype=t2T.dtype, buffer=out)
    Ht2[:] = 0

    _dgemm = lib.numpy_helper._dgemm

    def contract_blk_(Ht2, t2T, eri, i0, i1, j0, j1):
        ic = i1 - i0
        jc = j1 - j0
        #:Ht2[j0:j1] += numpy.einsum('efx,efab->abx', t2T[i0:i1], eri)
        _dgemm('T', 'N', jc * nvirb, nocc2, ic * nvirb,
               eri.reshape(ic * nvirb, jc * nvirb), t2T.reshape(-1, nocc2),
               Ht2.reshape(nvir2, nocc2), 1, 1, 0, i0 * nvirb * nocc2,
               j0 * nvirb * nocc2)

    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])
    if vvvv is None:  # AO-direct CCSD
        ao_loc = mol.ao_loc_nr()
        intor = mol._add_suffix('int2e')
        ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                                 'CVHFsetnr_direct_scf')
        blksize = max(BLKMIN, numpy.sqrt(max_memory * .9e6 / 8 / nvirb**2 / 2))
        fint = gto.moleintor.getints4c
        fload = ccsd._ccsd.libcc.CCload_eri

        ntasks = mpi.pool.size
        task_sh_locs = task_locs
        sh_ranges_tasks = []
        for task in range(ntasks):
            sh0 = task_sh_locs[task]
            sh1 = task_sh_locs[task + 1]
            sh_ranges = ao2mo.outcore.balance_partition(
                ao_loc, blksize, sh0, sh1)
            sh_ranges_tasks.append(sh_ranges)

        blksize = max(
            max(x[2] for x in sh_ranges) if sh_ranges else 0
            for sh_ranges in sh_ranges_tasks)
        eribuf = numpy.empty((blksize, blksize, nvirb, nvirb))
        loadbuf = numpy.empty((blksize, blksize, nvirb, nvirb))

        ao_sh_ranges = sh_ranges_tasks[rank]
        ao_sh0 = task_sh_locs[rank]
        ao_sh1 = task_sh_locs[rank + 1]
        ao_offset = ao_loc[ao_sh0]
        assert (nvira == ao_loc[ao_sh1] - ao_loc[ao_sh0])

        for task_id, t2T in _rotate_tensor_block(t2T):
            sh_ranges = sh_ranges_tasks[task_id]
            sh0 = task_sh_locs[task_id]
            cur_offset = ao_loc[sh0]

            for ish0, ish1, ni in sh_ranges:
                for jsh0, jsh1, nj in ao_sh_ranges:
                    eri = fint(intor,
                               mol._atm,
                               mol._bas,
                               mol._env,
                               shls_slice=(ish0, ish1, jsh0, jsh1),
                               aosym='s2kl',
                               ao_loc=ao_loc,
                               cintopt=ao2mopt._cintopt,
                               out=eribuf)
                    i0, i1 = ao_loc[ish0] - cur_offset, ao_loc[
                        ish1] - cur_offset
                    j0, j1 = ao_loc[jsh0] - ao_offset, ao_loc[jsh1] - ao_offset
                    tmp = numpy.ndarray((i1 - i0, nvirb, j1 - j0, nvirb),
                                        buffer=loadbuf)
                    fload(tmp.ctypes.data_as(ctypes.c_void_p),
                          eri.ctypes.data_as(ctypes.c_void_p),
                          (ctypes.c_int * 4)(i0, i1, j0, j1),
                          ctypes.c_int(nvirb))
                    contract_blk_(Ht2, t2T, tmp, i0, i1, j0, j1)
                    time0 = log.timer_debug1(
                        'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                        *time0)
    else:
        raise NotImplementedError
    return Ht2
Example #8
0
def _contract_vvvv_t2(mycc, vvvv, t2T, task_locs, out=None, verbose=None):
    '''Ht2 = numpy.einsum('ijcd,acbd->ijab', t2, vvvv)
    where vvvv has to be real and has the 4-fold permutation symmetry

    Args:
        vvvv : None or integral object
            if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm
    '''
    time0 = time.clock(), time.time()
    mol = mycc.mol
    log = logger.new_logger(mycc, verbose)

    if callable(t2T):
        t2T = t2T()
    assert(t2T.dtype == numpy.double)
    nvira, nvirb = t2T.shape[:2]
    nvir2 = nvira * nvirb
    t2T = t2T.reshape(nvira,nvirb,-1)
    nocc2 = t2T.shape[2]
    Ht2 = numpy.ndarray(t2T.shape, dtype=t2T.dtype, buffer=out)
    Ht2[:] = 0

    _dgemm = lib.numpy_helper._dgemm
    def contract_blk_(Ht2, t2T, eri, i0, i1, j0, j1):
        ic = i1 - i0
        jc = j1 - j0
        #:Ht2[j0:j1] += numpy.einsum('efx,efab->abx', t2T[i0:i1], eri)
        _dgemm('T', 'N', jc*nvirb, nocc2, ic*nvirb,
               eri.reshape(ic*nvirb,jc*nvirb), t2T.reshape(-1,nocc2),
               Ht2.reshape(nvir2,nocc2), 1, 1, 0, i0*nvirb*nocc2, j0*nvirb*nocc2)

    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])
    if vvvv is None:   # AO-direct CCSD
        ao_loc = mol.ao_loc_nr()
        intor = mol._add_suffix('int2e')
        ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                                 'CVHFsetnr_direct_scf')
        blksize = max(BLKMIN, numpy.sqrt(max_memory*.9e6/8/nvirb**2/2))
        fint = gto.moleintor.getints4c
        fload = ccsd._ccsd.libcc.CCload_eri

        ntasks = mpi.pool.size
        task_sh_locs = task_locs
        sh_ranges_tasks = []
        for task in range(ntasks):
            sh0 = task_sh_locs[task]
            sh1 = task_sh_locs[task+1]
            sh_ranges = ao2mo.outcore.balance_partition(ao_loc, blksize, sh0, sh1)
            sh_ranges_tasks.append(sh_ranges)

        blksize = max(max(x[2] for x in sh_ranges) if sh_ranges else 0
                      for sh_ranges in sh_ranges_tasks)
        eribuf = numpy.empty((blksize,blksize,nvirb,nvirb))
        loadbuf = numpy.empty((blksize,blksize,nvirb,nvirb))

        ao_sh_ranges = sh_ranges_tasks[rank]
        ao_sh0 = task_sh_locs[rank]
        ao_sh1 = task_sh_locs[rank+1]
        ao_offset = ao_loc[ao_sh0]
        assert(nvira == ao_loc[ao_sh1] - ao_loc[ao_sh0])

        for task_id, t2T in _rotate_tensor_block(t2T):
            sh_ranges = sh_ranges_tasks[task_id]
            sh0 = task_sh_locs[task_id]
            cur_offset = ao_loc[sh0]

            for ish0, ish1, ni in sh_ranges:
                for jsh0, jsh1, nj in ao_sh_ranges:
                    eri = fint(intor, mol._atm, mol._bas, mol._env,
                               shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl',
                               ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                    i0, i1 = ao_loc[ish0] - cur_offset, ao_loc[ish1] - cur_offset
                    j0, j1 = ao_loc[jsh0] - ao_offset , ao_loc[jsh1] - ao_offset
                    tmp = numpy.ndarray((i1-i0,nvirb,j1-j0,nvirb), buffer=loadbuf)
                    fload(tmp.ctypes.data_as(ctypes.c_void_p),
                          eri.ctypes.data_as(ctypes.c_void_p),
                          (ctypes.c_int*4)(i0, i1, j0, j1),
                          ctypes.c_int(nvirb))
                    contract_blk_(Ht2, t2T, tmp, i0, i1, j0, j1)
                    time0 = log.timer_debug1('AO-vvvv [%d:%d,%d:%d]' %
                                             (ish0,ish1,jsh0,jsh1), *time0)
    else:
        raise NotImplementedError
    return Ht2
Example #9
0
def _make_eris(mp, mo_coeff=None, verbose=None):
    log = logger.new_logger(mp, verbose)
    time0 = (time.clock(), time.time())

    log.debug('transform (ia|jb) outcore')
    mol = mp.mol
    nocc = mp.nocc
    nmo = mp.nmo
    nvir = nmo - nocc

    eris = mp2._ChemistsERIs(mp, mo_coeff)
    nao = eris.mo_coeff.shape[0]
    assert (nvir <= nao)
    orbo = eris.mo_coeff[:, :nocc]
    orbv = numpy.asarray(eris.mo_coeff[:, nocc:], order='F')
    eris.feri = lib.H5TmpFile()

    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    fint = gto.moleintor.getints4c

    ntasks = mpi.pool.size
    olocs = [_task_location(nocc, task_id) for task_id in range(ntasks)]
    oloc0, oloc1 = olocs[rank]
    nocc_seg = oloc1 - oloc0
    log.debug2('olocs %s', olocs)

    ao_loc = mol.ao_loc_nr()
    task_sh_locs = lib.misc._balanced_partition(ao_loc, ntasks)
    log.debug2('task_sh_locs %s', task_sh_locs)
    ao_sh0 = task_sh_locs[rank]
    ao_sh1 = task_sh_locs[rank + 1]
    ao_loc0 = ao_loc[ao_sh0]
    ao_loc1 = ao_loc[ao_sh1]
    nao_seg = ao_loc1 - ao_loc0
    orbo_seg = orbo[ao_loc0:ao_loc1]

    mem_now = lib.current_memory()[0]
    max_memory = max(0, mp.max_memory - mem_now)
    dmax = numpy.sqrt(max_memory * .9e6 / 8 / ((nao + nocc) *
                                               (nao_seg + nocc)))
    dmax = min(nao // 4 + 2, max(BLKMIN, min(comm.allgather(dmax))))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    sh_ranges = comm.bcast(sh_ranges)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao_seg))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax,
              nocc * nocc_seg * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6)

    def save(count, tmp_xo):
        di, dj = tmp_xo.shape[2:4]
        tmp_xo = [tmp_xo[p0:p1] for p0, p1 in olocs]
        tmp_xo = mpi.alltoall(tmp_xo, split_recvbuf=True)
        tmp_xo = sum(tmp_xo).reshape(nocc_seg, nocc, di, dj)
        ftmp[str(count) + 'b'] = tmp_xo

        tmp_ox = mpi.alltoall([tmp_xo[:, p0:p1] for p0, p1 in olocs],
                              split_recvbuf=True)
        tmp_ox = [
            tmp_ox[i].reshape(p1 - p0, nocc_seg, di, dj)
            for i, (p0, p1) in enumerate(olocs)
        ]
        ftmp[str(count) + 'a'] = numpy.vstack(tmp_ox)

    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(save) as bg_save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip + 1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0, i1, j0, j1))

                shls_slice = (0, mol.nbas, ish0, ish1, jsh0, jsh1, ao_sh0,
                              ao_sh1)
                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=shls_slice,
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_xo = lib.einsum('pi,pqrs->iqrs', orbo, eri)
                tmp_xo = lib.einsum('iqrs,sl->ilqr', tmp_xo, orbo_seg)
                bg_save(count, tmp_xo)
                tmp_xo = None
                count += 1
                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    eri = eribuf = None
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)

    eris.ovov = eris.feri.create_dataset('ovov', (nocc, nvir, nocc_seg, nvir),
                                         'f8')
    occblk = int(
        min(nocc,
            max(BLKMIN, max_memory * .9e6 / 8 / (nao**2 * nocc_seg + 1) / 5)))

    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(jk_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = ftmp[str(k) + 'a'][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k) + 'b'][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def save(i0, i1, dat):
        eris.ovov[i0:i1] = dat

    buf_prefecth = numpy.empty((occblk, nocc_seg, nao, nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk * nocc_seg, nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocc_seg, nao, nao)

                dat = _ao2mo.nr_e2(eri,
                                   orbv, (0, nvir, 0, nvir),
                                   's1',
                                   's1',
                                   out=bufw)
                bsave(
                    i0, i1,
                    dat.reshape(i1 - i0, nocc_seg, nvir,
                                nvir).transpose(0, 2, 1, 3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1),
                                         *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    mp._eris = eris
    return eris
Example #10
0
def _make_eris(mp, mo_coeff=None, verbose=None):
    log = logger.new_logger(mp, verbose)
    time0 = (time.clock(), time.time())

    log.debug('transform (ia|jb) outcore')
    mol = mp.mol
    nocc = mp.nocc
    nmo = mp.nmo
    nvir = nmo - nocc

    eris = mp2._ChemistsERIs(mp, mo_coeff)
    nao = eris.mo_coeff.shape[0]
    assert(nvir <= nao)
    orbo = eris.mo_coeff[:,:nocc]
    orbv = numpy.asarray(eris.mo_coeff[:,nocc:], order='F')
    eris.feri = lib.H5TmpFile()

    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    fint = gto.moleintor.getints4c

    ntasks = mpi.pool.size
    olocs = [_task_location(nocc, task_id) for task_id in range(ntasks)]
    oloc0, oloc1 = olocs[rank]
    nocc_seg = oloc1 - oloc0
    log.debug2('olocs %s', olocs)

    ao_loc = mol.ao_loc_nr()
    task_sh_locs = lib.misc._balanced_partition(ao_loc, ntasks)
    log.debug2('task_sh_locs %s', task_sh_locs)
    ao_sh0 = task_sh_locs[rank]
    ao_sh1 = task_sh_locs[rank+1]
    ao_loc0 = ao_loc[ao_sh0]
    ao_loc1 = ao_loc[ao_sh1]
    nao_seg = ao_loc1 - ao_loc0
    orbo_seg = orbo[ao_loc0:ao_loc1]

    mem_now = lib.current_memory()[0]
    max_memory = max(0, mp.max_memory - mem_now)
    dmax = numpy.sqrt(max_memory*.9e6/8/((nao+nocc)*(nao_seg+nocc)))
    dmax = min(nao//4+2, max(BLKMIN, min(comm.allgather(dmax))))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    sh_ranges = comm.bcast(sh_ranges)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao,dmax,dmax,nao_seg))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax, nocc*nocc_seg*(nao*(nao+dmax)/2+nvir**2)*8/1e6)

    def save(count, tmp_xo):
        di, dj = tmp_xo.shape[2:4]
        tmp_xo = [tmp_xo[p0:p1] for p0, p1 in olocs]
        tmp_xo = mpi.alltoall(tmp_xo, split_recvbuf=True)
        tmp_xo = sum(tmp_xo).reshape(nocc_seg,nocc,di,dj)
        ftmp[str(count)+'b'] = tmp_xo

        tmp_ox = mpi.alltoall([tmp_xo[:,p0:p1] for p0, p1 in olocs],
                              split_recvbuf=True)
        tmp_ox = [tmp_ox[i].reshape(p1-p0,nocc_seg,di,dj)
                  for i, (p0,p1) in enumerate(olocs)]
        ftmp[str(count)+'a'] = numpy.vstack(tmp_ox)

    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(save) as bg_save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip+1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0,i1,j0,j1))

                shls_slice = (0,mol.nbas,ish0,ish1, jsh0,jsh1,ao_sh0,ao_sh1)
                eri = fint(int2e, mol._atm, mol._bas, mol._env,
                           shls_slice=shls_slice, aosym='s1', ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt, out=eribuf)
                tmp_xo = lib.einsum('pi,pqrs->iqrs', orbo, eri)
                tmp_xo = lib.einsum('iqrs,sl->ilqr', tmp_xo, orbo_seg)
                bg_save(count, tmp_xo)
                tmp_xo = None
                count += 1
                time1 = log.timer_debug1('partial ao2mo [%d:%d,%d:%d]' %
                                         (ish0,ish1,jsh0,jsh1), *time1)
    eri = eribuf = None
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)

    eris.ovov = eris.feri.create_dataset('ovov', (nocc,nvir,nocc_seg,nvir), 'f8')
    occblk = int(min(nocc, max(BLKMIN, max_memory*.9e6/8/(nao**2*nocc_seg+1)/5)))
    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0+occblk, nocc)
            for k, (p0,p1,q0,q1) in enumerate(jk_blk_slices):
                eri[:i1-i0,:,p0:p1,q0:q1] = ftmp[str(k)+'a'][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k)+'b'][:,i0:i1])
                    eri[:i1-i0,:,q0:q1,p0:p1] = dat.transpose(1,0,3,2)

    def save(i0, i1, dat):
        eris.ovov[i0:i1] = dat

    buf_prefecth = numpy.empty((occblk,nocc_seg,nao,nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk*nocc_seg,nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1-i0].reshape((i1-i0)*nocc_seg,nao,nao)

                dat = _ao2mo.nr_e2(eri, orbv, (0,nvir,0,nvir), 's1', 's1', out=bufw)
                bsave(i0, i1, dat.reshape(i1-i0,nocc_seg,nvir,nvir).transpose(0,2,1,3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0,i1), *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    mp._eris = eris
    return eris