Ejemplo n.º 1
0
    def start(self, interval=0.02):
        mycc = self._cc
        log = logger.new_logger(mycc)
        cpu1 = (logger.process_clock(), logger.perf_counter())
        eris = mycc._eris
        t2T = mycc.t2.transpose(2, 3, 0, 1)

        nocc, nvir = mycc.t1.shape
        nmo = nocc + nvir
        vloc0, vloc1 = self.vranges[rank]
        nvir_seg = vloc1 - vloc0

        max_memory = min(24000, mycc.max_memory - lib.current_memory()[0])
        blksize = min(
            nvir_seg // 4 + 1,
            max(16, int(max_memory * .3e6 / 8 / (nvir * nocc * nmo))))
        self.eri_tmp = lib.H5TmpFile()
        vvop = self.eri_tmp.create_dataset('vvop', (nvir_seg, nvir, nocc, nmo),
                                           'f8')

        def save_vvop(j0, j1, vvvo):
            buf = numpy.empty((j1 - j0, nvir, nocc, nmo), dtype=t2T.dtype)
            buf[:, :, :, :nocc] = eris.ovov[:, j0:j1].conj().transpose(
                1, 3, 0, 2)
            for k, (q0, q1) in enumerate(self.vranges):
                blk = vvvo[k].reshape(q1 - q0, nvir, j1 - j0, nocc)
                buf[:, q0:q1, :, nocc:] = blk.transpose(2, 0, 3, 1)
            vvop[j0:j1] = buf

        with lib.call_in_background(save_vvop) as save_vvop:
            for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
                j0, j1 = p0 - vloc0, p1 - vloc0
                sub_locs = comm.allgather((p0, p1))
                vvvo = mpi.alltoall_new(
                    [eris.vvvo[:, :, q0:q1] for q0, q1 in sub_locs],
                    split_recvbuf=True)
                save_vvop(j0, j1, vvvo)
                cpu1 = log.timer_debug1('transpose %d:%d' % (p0, p1), *cpu1)

        def send_data():
            while True:
                while comm.Iprobe(source=MPI.ANY_SOURCE, tag=INQUIRY):
                    tensors, dest = comm.recv(source=MPI.ANY_SOURCE,
                                              tag=INQUIRY)
                    for task, slices in tensors:
                        if task == 'Done':
                            return
                        else:
                            mpi.send(self._get_tensor(task, slices),
                                     dest,
                                     tag=TRANSFER_DATA)
                time.sleep(interval)

        daemon = threading.Thread(target=send_data)
        daemon.start()
        return daemon
Ejemplo n.º 2
0
    def start(self, interval=0.02):
        mycc = self._cc
        log = logger.new_logger(mycc)
        cpu1 = (time.clock(), time.time())
        eris = mycc._eris
        t2T = mycc.t2.transpose(2,3,0,1)

        nocc, nvir = mycc.t1.shape
        nmo = nocc + nvir
        vloc0, vloc1 = self.vranges[rank]
        nvir_seg = vloc1 - vloc0

        max_memory = min(24000, mycc.max_memory - lib.current_memory()[0])
        blksize = min(nvir_seg//4+1, max(16, int(max_memory*.3e6/8/(nvir*nocc*nmo))))
        self.eri_tmp = lib.H5TmpFile()
        vvop = self.eri_tmp.create_dataset('vvop', (nvir_seg,nvir,nocc,nmo), 'f8')

        def save_vvop(j0, j1, vvvo):
            buf = numpy.empty((j1-j0,nvir,nocc,nmo), dtype=t2T.dtype)
            buf[:,:,:,:nocc] = eris.ovov[:,j0:j1].conj().transpose(1,3,0,2)
            for k, (q0, q1) in enumerate(self.vranges):
                blk = vvvo[k].reshape(q1-q0,nvir,j1-j0,nocc)
                buf[:,q0:q1,:,nocc:] = blk.transpose(2,0,3,1)
            vvop[j0:j1] = buf

        with lib.call_in_background(save_vvop) as save_vvop:
            for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
                j0, j1 = p0 - vloc0, p1 - vloc0
                sub_locs = comm.allgather((p0,p1))
                vvvo = mpi.alltoall([eris.vvvo[:,:,q0:q1] for q0, q1 in sub_locs],
                                    split_recvbuf=True)
                save_vvop(j0, j1, vvvo)
                cpu1 = log.timer_debug1('transpose %d:%d'%(p0,p1), *cpu1)

        def send_data():
            while True:
                while comm.Iprobe(source=MPI.ANY_SOURCE, tag=INQUIRY):
                    tensors, dest = comm.recv(source=MPI.ANY_SOURCE, tag=INQUIRY)
                    for task, slices in tensors:
                        if task == 'Done':
                            return
                        else:
                            mpi.send(self._get_tensor(task, slices), dest,
                                     tag=TRANSFER_DATA)
                time.sleep(interval)

        daemon = threading.Thread(target=send_data)
        daemon.start()
        return daemon
Ejemplo n.º 3
0
def _make_eris_outcore(mycc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    log = logger.Logger(mycc.stdout, mycc.verbose)
    _sync_(mycc)
    eris = ccsd._ChemistsERIs()
    if rank == 0:
        eris._common_init_(mycc, mo_coeff)
        comm.bcast((eris.mo_coeff, eris.fock, eris.nocc, eris.mo_energy))
    else:
        eris.mol = mycc.mol
        eris.mo_coeff, eris.fock, eris.nocc, eris.mo_energy = comm.bcast(None)

    mol = mycc.mol
    mo_coeff = numpy.asarray(eris.mo_coeff, order='F')
    nocc = eris.nocc
    nao, nmo = mo_coeff.shape
    nvir = nmo - nocc
    orbo = mo_coeff[:, :nocc]
    orbv = mo_coeff[:, nocc:]
    nvpair = nvir * (nvir + 1) // 2
    vlocs = [_task_location(nvir, task_id) for task_id in range(mpi.pool.size)]
    vloc0, vloc1 = vlocs[rank]
    vseg = vloc1 - vloc0

    eris.feri1 = lib.H5TmpFile()
    eris.oooo = eris.feri1.create_dataset('oooo', (nocc, nocc, nocc, nocc),
                                          'f8')
    eris.oovv = eris.feri1.create_dataset('oovv', (nocc, nocc, vseg, nvir),
                                          'f8',
                                          chunks=(nocc, nocc, 1, nvir))
    eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc, vseg, nocc, nocc),
                                          'f8',
                                          chunks=(nocc, 1, nocc, nocc))
    eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc, vseg, nvir, nocc),
                                          'f8',
                                          chunks=(nocc, 1, nvir, nocc))
    eris.ovov = eris.feri1.create_dataset('ovov', (nocc, vseg, nocc, nvir),
                                          'f8',
                                          chunks=(nocc, 1, nocc, nvir))
    #    eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc,vseg,nvpair), 'f8', chunks=(nocc,1,nvpair))
    eris.vvvo = eris.feri1.create_dataset('vvvo', (vseg, nvir, nvir, nocc),
                                          'f8',
                                          chunks=(1, nvir, 1, nocc))
    assert (mycc.direct)

    def save_occ_frac(p0, p1, eri):
        eri = eri.reshape(p1 - p0, nocc, nmo, nmo)
        eris.oooo[p0:p1] = eri[:, :, :nocc, :nocc]
        eris.oovv[p0:p1] = eri[:, :, nocc + vloc0:nocc + vloc1, nocc:]

    def save_vir_frac(p0, p1, eri):
        log.alldebug1('save_vir_frac %d %d %s', p0, p1, eri.shape)
        eri = eri.reshape(p1 - p0, nocc, nmo, nmo)
        eris.ovoo[:, p0:p1] = eri[:, :, :nocc, :nocc].transpose(1, 0, 2, 3)
        eris.ovvo[:, p0:p1] = eri[:, :, nocc:, :nocc].transpose(1, 0, 2, 3)
        eris.ovov[:, p0:p1] = eri[:, :, :nocc, nocc:].transpose(1, 0, 2, 3)
        #        vvv = lib.pack_tril(eri[:,:,nocc:,nocc:].reshape((p1-p0)*nocc,nvir,nvir))
        #        eris.ovvv[:,p0:p1] = vvv.reshape(p1-p0,nocc,nvpair).transpose(1,0,2)

        cput2 = time.clock(), time.time()
        ovvv_segs = [
            eri[:, :, nocc + q0:nocc + q1, nocc:].transpose(2, 3, 0, 1)
            for q0, q1 in vlocs
        ]
        ovvv_segs = mpi.alltoall(ovvv_segs, split_recvbuf=True)
        cput2 = log.timer_debug1('vvvo alltoall', *cput2)
        for task_id, (q0, q1) in enumerate(comm.allgather((p0, p1))):
            ip0 = q0 + vlocs[task_id][0]
            ip1 = q1 + vlocs[task_id][0]
            eris.vvvo[:, :, ip0:ip1] = ovvv_segs[task_id].reshape(
                vseg, nvir, q1 - q0, nocc)

    fswap = lib.H5TmpFile()
    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])
    int2e = mol._add_suffix('int2e')
    orbov = numpy.hstack((orbo, orbv[:, vloc0:vloc1]))
    ao2mo.outcore.half_e1(mol, (orbov, orbo),
                          fswap,
                          int2e,
                          's4',
                          1,
                          max_memory,
                          verbose=log)

    ao_loc = mol.ao_loc_nr()
    nao_pair = nao * (nao + 1) // 2
    blksize = int(min(8e9, max_memory * .5e6) / 8 / (nao_pair + nmo**2) / nocc)
    blksize = min(nvir, max(BLKMIN, blksize))
    fload = ao2mo.outcore._load_from_h5g

    buf = numpy.empty((blksize * nocc, nao_pair))
    buf_prefetch = numpy.empty_like(buf)

    def prefetch(p0, p1, rowmax):
        p0, p1 = p1, min(rowmax, p1 + blksize)
        if p0 < p1:
            fload(fswap['0'], p0 * nocc, p1 * nocc, buf_prefetch)

    cput1 = time.clock(), time.time()
    outbuf = numpy.empty((blksize * nocc, nmo**2))
    with lib.call_in_background(prefetch) as bprefetch:
        fload(fswap['0'], 0, min(nocc, blksize) * nocc, buf_prefetch)
        for p0, p1 in lib.prange(0, nocc, blksize):
            nrow = (p1 - p0) * nocc
            buf, buf_prefetch = buf_prefetch, buf
            bprefetch(p0, p1, nocc)
            dat = ao2mo._ao2mo.nr_e2(buf[:nrow],
                                     mo_coeff, (0, nmo, 0, nmo),
                                     's4',
                                     's1',
                                     out=outbuf,
                                     ao_loc=ao_loc)
            save_occ_frac(p0, p1, dat)

        blksize = min(comm.allgather(blksize))
        norb_max = nocc + vseg
        fload(fswap['0'], nocc**2,
              min(nocc + blksize, norb_max) * nocc, buf_prefetch)
        for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
            i0, i1 = p0 - vloc0, p1 - vloc0
            nrow = (p1 - p0) * nocc
            buf, buf_prefetch = buf_prefetch, buf
            bprefetch(nocc + i0, nocc + i1, norb_max)
            dat = ao2mo._ao2mo.nr_e2(buf[:nrow],
                                     mo_coeff, (0, nmo, 0, nmo),
                                     's4',
                                     's1',
                                     out=outbuf,
                                     ao_loc=ao_loc)
            save_vir_frac(i0, i1, dat)
    buf = buf_prefecth = outbuf = None

    cput1 = log.timer_debug1('transforming oppp', *cput1)
    log.timer('CCSD integral transformation', *cput0)
    mycc._eris = eris
    return eris
Ejemplo n.º 4
0
def _make_eris_outcore(mycc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    log = logger.Logger(mycc.stdout, mycc.verbose)
    _sync_(mycc)
    eris = ccsd._ChemistsERIs()
    if rank == 0:
        eris._common_init_(mycc, mo_coeff)
        comm.bcast((eris.mo_coeff, eris.fock, eris.nocc, eris.mo_energy))
    else:
        eris.mol = mycc.mol
        eris.mo_coeff, eris.fock, eris.nocc, eris.mo_energy = comm.bcast(None)

    mol = mycc.mol
    mo_coeff = numpy.asarray(eris.mo_coeff, order='F')
    nocc = eris.nocc
    nao, nmo = mo_coeff.shape
    nvir = nmo - nocc
    orbo = mo_coeff[:,:nocc]
    orbv = mo_coeff[:,nocc:]
    nvpair = nvir * (nvir+1) // 2
    vlocs = [_task_location(nvir, task_id) for task_id in range(mpi.pool.size)]
    vloc0, vloc1 = vlocs[rank]
    vseg = vloc1 - vloc0

    eris.feri1 = lib.H5TmpFile()
    eris.oooo = eris.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8')
    eris.oovv = eris.feri1.create_dataset('oovv', (nocc,nocc,vseg,nvir), 'f8', chunks=(nocc,nocc,1,nvir))
    eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc,vseg,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc))
    eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc,vseg,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc))
    eris.ovov = eris.feri1.create_dataset('ovov', (nocc,vseg,nocc,nvir), 'f8', chunks=(nocc,1,nocc,nvir))
#    eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc,vseg,nvpair), 'f8', chunks=(nocc,1,nvpair))
    eris.vvvo = eris.feri1.create_dataset('vvvo', (vseg,nvir,nvir,nocc), 'f8', chunks=(1,nvir,1,nocc))
    assert(mycc.direct)

    def save_occ_frac(p0, p1, eri):
        eri = eri.reshape(p1-p0,nocc,nmo,nmo)
        eris.oooo[p0:p1] = eri[:,:,:nocc,:nocc]
        eris.oovv[p0:p1] = eri[:,:,nocc+vloc0:nocc+vloc1,nocc:]

    def save_vir_frac(p0, p1, eri):
        log.alldebug1('save_vir_frac %d %d %s', p0, p1, eri.shape)
        eri = eri.reshape(p1-p0,nocc,nmo,nmo)
        eris.ovoo[:,p0:p1] = eri[:,:,:nocc,:nocc].transpose(1,0,2,3)
        eris.ovvo[:,p0:p1] = eri[:,:,nocc:,:nocc].transpose(1,0,2,3)
        eris.ovov[:,p0:p1] = eri[:,:,:nocc,nocc:].transpose(1,0,2,3)
#        vvv = lib.pack_tril(eri[:,:,nocc:,nocc:].reshape((p1-p0)*nocc,nvir,nvir))
#        eris.ovvv[:,p0:p1] = vvv.reshape(p1-p0,nocc,nvpair).transpose(1,0,2)

        cput2 = time.clock(), time.time()
        ovvv_segs = [eri[:,:,nocc+q0:nocc+q1,nocc:].transpose(2,3,0,1) for q0,q1 in vlocs]
        ovvv_segs = mpi.alltoall(ovvv_segs, split_recvbuf=True)
        cput2 = log.timer_debug1('vvvo alltoall', *cput2)
        for task_id, (q0, q1) in enumerate(comm.allgather((p0,p1))):
            ip0 = q0 + vlocs[task_id][0]
            ip1 = q1 + vlocs[task_id][0]
            eris.vvvo[:,:,ip0:ip1] = ovvv_segs[task_id].reshape(vseg,nvir,q1-q0,nocc)

    fswap = lib.H5TmpFile()
    max_memory = max(MEMORYMIN, mycc.max_memory-lib.current_memory()[0])
    int2e = mol._add_suffix('int2e')
    orbov = numpy.hstack((orbo, orbv[:,vloc0:vloc1]))
    ao2mo.outcore.half_e1(mol, (orbov,orbo), fswap, int2e,
                          's4', 1, max_memory, verbose=log)

    ao_loc = mol.ao_loc_nr()
    nao_pair = nao * (nao+1) // 2
    blksize = int(min(8e9,max_memory*.5e6)/8/(nao_pair+nmo**2)/nocc)
    blksize = min(nvir, max(BLKMIN, blksize))
    fload = ao2mo.outcore._load_from_h5g

    buf = numpy.empty((blksize*nocc,nao_pair))
    buf_prefetch = numpy.empty_like(buf)
    def prefetch(p0, p1, rowmax):
        p0, p1 = p1, min(rowmax, p1+blksize)
        if p0 < p1:
            fload(fswap['0'], p0*nocc, p1*nocc, buf_prefetch)

    cput1 = time.clock(), time.time()
    outbuf = numpy.empty((blksize*nocc,nmo**2))
    with lib.call_in_background(prefetch) as bprefetch:
        fload(fswap['0'], 0, min(nocc,blksize)*nocc, buf_prefetch)
        for p0, p1 in lib.prange(0, nocc, blksize):
            nrow = (p1 - p0) * nocc
            buf, buf_prefetch = buf_prefetch, buf
            bprefetch(p0, p1, nocc)
            dat = ao2mo._ao2mo.nr_e2(buf[:nrow], mo_coeff, (0,nmo,0,nmo),
                                     's4', 's1', out=outbuf, ao_loc=ao_loc)
            save_occ_frac(p0, p1, dat)

        blksize = min(comm.allgather(blksize))
        norb_max = nocc + vseg
        fload(fswap['0'], nocc**2, min(nocc+blksize,norb_max)*nocc, buf_prefetch)
        for p0, p1 in mpi.prange(vloc0, vloc1, blksize):
            i0, i1 = p0 - vloc0, p1 - vloc0
            nrow = (p1 - p0) * nocc
            buf, buf_prefetch = buf_prefetch, buf
            bprefetch(nocc+i0, nocc+i1, norb_max)
            dat = ao2mo._ao2mo.nr_e2(buf[:nrow], mo_coeff, (0,nmo,0,nmo),
                                     's4', 's1', out=outbuf, ao_loc=ao_loc)
            save_vir_frac(i0, i1, dat)
    buf = buf_prefecth = outbuf = None

    cput1 = log.timer_debug1('transforming oppp', *cput1)
    log.timer('CCSD integral transformation', *cput0)
    mycc._eris = eris
    return eris