Esempio n. 1
0
    def test_transpose_sum(self):
        a = numpy.random.random((3, 400, 400))
        self.assertAlmostEqual(
            abs(a[0] + a[0].T - lib.hermi_sum(a[0])).max(), 0, 12)
        self.assertAlmostEqual(
            abs(a + a.transpose(0, 2, 1) - lib.hermi_sum(a, (0, 2, 1))).max(),
            0, 12)
        self.assertAlmostEqual(
            abs(a + a.transpose(0, 2, 1) -
                lib.hermi_sum(a, (0, 2, 1), inplace=True)).max(), 0, 12)
        a = numpy.random.random((3, 400, 400)) + numpy.random.random(
            (3, 400, 400)) * 1j
        self.assertAlmostEqual(
            abs(a[0] + a[0].T.conj() - lib.hermi_sum(a[0])).max(), 0, 12)
        self.assertAlmostEqual(
            abs(a + a.transpose(0, 2, 1).conj() -
                lib.hermi_sum(a, (0, 2, 1))).max(), 0, 12)
        self.assertAlmostEqual(
            abs(a + a.transpose(0, 2, 1) -
                lib.hermi_sum(a, (0, 2, 1), hermi=3)).max(), 0, 12)
        self.assertAlmostEqual(
            abs(a + a.transpose(0, 2, 1).conj() -
                lib.hermi_sum(a, (0, 2, 1), inplace=True)).max(), 0, 12)

        a = numpy.random.random((400, 400))
        b = a + a.T.conj()
        c = lib.transpose_sum(a)
        self.assertAlmostEqual(abs(b - c).max(), 0, 12)

        a = (a * 1000).astype(numpy.int32)
        b = a + a.T
        c = lib.transpose_sum(a)
        self.assertAlmostEqual(abs(b - c).max(), 0, 12)
        self.assertTrue(c.dtype == numpy.int32)
Esempio n. 2
0
def _check_(c):
    c = lib.transpose_sum(c, inplace=True)
    c *= .5
    norm = numpy.linalg.norm(c)
    if abs(norm-1) > 1e-6:
        raise ValueError('State not singlet %g' % abs(numpy.linalg.norm(c)-1))
    return c/norm
Esempio n. 3
0
def _check_(c):
    c = lib.transpose_sum(c, inplace=True)
    c *= .5
    norm = numpy.linalg.norm(c)
    if abs(norm-1) > 1e-6:
        raise ValueError('State not singlet %g' % abs(numpy.linalg.norm(c)-1))
    return c/norm
Esempio n. 4
0
def part_eri_hermi(eri, norb, nimp):
    eri1 = ao2mo.restore(4, eri, norb)
    for i in range(eri1.shape[0]):
        tmp = lib.unpack_tril(eri1[i])
        tmp[nimp:] = 0
        eri1[i] = lib.pack_tril(tmp + tmp.T)
    eri1 = lib.transpose_sum(eri1, inplace=True)
    return ao2mo.restore(8, eri1, norb) * 0.25
Esempio n. 5
0
def contract_2e(eri, fcivec, norb, nelec, link_index=None):
    fcivec = numpy.asarray(fcivec, order='C')
    eri = ao2mo.restore(4, eri, norb)
    lib.transpose_sum(eri, inplace=True)
    eri *= .5
    link_index = _unpack(norb, nelec, link_index)
    na, nlink = link_index.shape[:2]
    assert(fcivec.size == na**2)
    ci1 = numpy.empty((na,na))

    libfci.FCIcontract_2e_spin0(eri.ctypes.data_as(ctypes.c_void_p),
                                fcivec.ctypes.data_as(ctypes.c_void_p),
                                ci1.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(na),
                                ctypes.c_int(nlink),
                                link_index.ctypes.data_as(ctypes.c_void_p))
# no *.5 because FCIcontract_2e_spin0 only compute half of the contraction
    return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
Esempio n. 6
0
def contract_2e(eri, fcivec, norb, nelec, link_index=None):
    fcivec = numpy.asarray(fcivec, order='C')
    eri = ao2mo.restore(4, eri, norb)
    lib.transpose_sum(eri, inplace=True)
    eri *= .5
    link_index = _unpack(norb, nelec, link_index)
    na, nlink = link_index.shape[:2]
    assert (fcivec.size == na**2)
    ci1 = numpy.empty((na, na))

    libfci.FCIcontract_2e_spin0(eri.ctypes.data_as(ctypes.c_void_p),
                                fcivec.ctypes.data_as(ctypes.c_void_p),
                                ci1.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(na),
                                ctypes.c_int(nlink),
                                link_index.ctypes.data_as(ctypes.c_void_p))
    # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction
    return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
Esempio n. 7
0
def reorder_rdm(rdm1, rdm2, inplace=False):
    nmo = rdm1.shape[0]
    if not inplace:
        rdm2 = rdm2.copy()
    for k in range(nmo):
        rdm2[:, k, k, :] -= rdm1
    #return rdm1, rdm2
    rdm2 = lib.transpose_sum(rdm2.reshape(nmo * nmo, -1), inplace=True) * .5
    return rdm1, rdm2.reshape(nmo, nmo, nmo, nmo)
Esempio n. 8
0
File: rdm.py Progetto: sunqm/pyscf
def reorder_rdm(rdm1, rdm2, inplace=False):
    nmo = rdm1.shape[0]
    if not inplace:
        rdm2 = rdm2.copy()
    for k in range(nmo):
        rdm2[:,k,k,:] -= rdm1.T
    #return rdm1, rdm2
    rdm2 = lib.transpose_sum(rdm2.reshape(nmo*nmo,-1), inplace=True) * .5
    return rdm1, rdm2.reshape(nmo,nmo,nmo,nmo)
Esempio n. 9
0
def reorder_rdm(rdm1, rdm2, inplace=False):
    nmo = rdm1.shape[0]
    if not inplace:
        rdm2 = rdm2.copy()
    for k in range(nmo):
        rdm2[:, k, k, :] -= rdm1.T

    # Employing the particle permutation symmetry, average over two particles
    # to reduce numerical round off error
    rdm2 = lib.transpose_sum(rdm2.reshape(nmo * nmo, -1), inplace=True) * .5
    return rdm1, rdm2.reshape(nmo, nmo, nmo, nmo)
Esempio n. 10
0
    def test_transpose_sum(self):
        a = numpy.random.random((3,400,400))
        self.assertAlmostEqual(abs(a[0]+a[0].T - lib.hermi_sum(a[0])).max(), 0, 12)
        self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1))).max(), 0, 12)
        self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1), inplace=True)).max(), 0, 12)
        a = numpy.random.random((3,400,400)) + numpy.random.random((3,400,400)) * 1j
        self.assertAlmostEqual(abs(a[0]+a[0].T.conj() - lib.hermi_sum(a[0])).max(), 0, 12)
        self.assertAlmostEqual(abs(a+a.transpose(0,2,1).conj() - lib.hermi_sum(a,(0,2,1))).max(), 0, 12)
        self.assertAlmostEqual(abs(a+a.transpose(0,2,1) - lib.hermi_sum(a,(0,2,1),hermi=3)).max(), 0, 12)
        self.assertAlmostEqual(abs(a+a.transpose(0,2,1).conj() - lib.hermi_sum(a,(0,2,1),inplace=True)).max(), 0, 12)

        a = numpy.random.random((400,400))
        b = a + a.T.conj()
        c = lib.transpose_sum(a)
        self.assertAlmostEqual(abs(b-c).max(), 0, 12)

        a = (a*1000).astype(numpy.int32)
        b = a + a.T
        c = lib.transpose_sum(a)
        self.assertAlmostEqual(abs(b-c).max(), 0, 12)
        self.assertTrue(c.dtype == numpy.int32)
Esempio n. 11
0
def contract_2e(eri,
                fcivec,
                norb,
                nelec,
                link_index=None,
                orbsym=None,
                wfnsym=0):
    if orbsym is None:
        return direct_spin0.contract_2e(eri, fcivec, norb, nelec, link_index)

    eri = ao2mo.restore(4, eri, norb)
    neleca, nelecb = direct_spin1._unpack_nelec(nelec)
    assert (neleca == nelecb)
    link_indexa = direct_spin0._unpack(norb, nelec, link_index)
    na, nlinka = link_indexa.shape[:2]
    eri_irs, rank_eri, irrep_eri = direct_spin1_symm.reorder_eri(
        eri, norb, orbsym)

    strsa = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca))
    aidx, link_indexa = direct_spin1_symm.gen_str_irrep(
        strsa, orbsym, link_indexa, rank_eri, irrep_eri)

    Tirrep = ctypes.c_void_p * TOTIRREPS
    linka_ptr = Tirrep(
        *[x.ctypes.data_as(ctypes.c_void_p) for x in link_indexa])
    eri_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in eri_irs])
    dimirrep = (ctypes.c_int * TOTIRREPS)(*[x.shape[0] for x in eri_irs])
    fcivec_shape = fcivec.shape
    fcivec = fcivec.reshape((na, na), order='C')
    ci1new = numpy.zeros_like(fcivec)
    nas = (ctypes.c_int * TOTIRREPS)(*[x.size for x in aidx])

    ci0 = []
    ci1 = []
    for ir in range(TOTIRREPS):
        ma, mb = aidx[ir].size, aidx[wfnsym ^ ir].size
        ci0.append(numpy.zeros((ma, mb)))
        ci1.append(numpy.zeros((ma, mb)))
        if ma > 0 and mb > 0:
            lib.take_2d(fcivec, aidx[ir], aidx[wfnsym ^ ir], out=ci0[ir])
    ci0_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci0])
    ci1_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci1])
    libfci.FCIcontract_2e_symm1(eri_ptrs, ci0_ptrs, ci1_ptrs,
                                ctypes.c_int(norb), nas, nas,
                                ctypes.c_int(nlinka), ctypes.c_int(nlinka),
                                linka_ptr, linka_ptr, dimirrep,
                                ctypes.c_int(wfnsym))
    for ir in range(TOTIRREPS):
        if ci0[ir].size > 0:
            lib.takebak_2d(ci1new, ci1[ir], aidx[ir], aidx[wfnsym ^ ir])
    return lib.transpose_sum(ci1new, inplace=True).reshape(fcivec_shape)
Esempio n. 12
0
def contract_1e(f1e, fcivec, norb, nelec, link_index=None):
    fcivec = numpy.asarray(fcivec, order='C')
    link_index = _unpack(norb, nelec, link_index)
    na, nlink = link_index.shape[:2]
    assert(fcivec.size == na**2)
    ci1 = numpy.empty_like(fcivec)
    f1e_tril = lib.pack_tril(f1e)
    libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p),
                                fcivec.ctypes.data_as(ctypes.c_void_p),
                                ci1.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(na),
                                ctypes.c_int(nlink),
                                link_index.ctypes.data_as(ctypes.c_void_p))
# no *.5 because FCIcontract_2e_spin0 only compute half of the contraction
    return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
Esempio n. 13
0
def contract_1e(f1e, fcivec, norb, nelec, link_index=None):
    fcivec = numpy.asarray(fcivec, order='C')
    link_index = _unpack(norb, nelec, link_index)
    na, nlink = link_index.shape[:2]
    assert (fcivec.size == na**2)
    ci1 = numpy.empty_like(fcivec)
    f1e_tril = lib.pack_tril(f1e)
    libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p),
                                fcivec.ctypes.data_as(ctypes.c_void_p),
                                ci1.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(na),
                                ctypes.c_int(nlink),
                                link_index.ctypes.data_as(ctypes.c_void_p))
    # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction
    return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
Esempio n. 14
0
def contract_2e(eri, civec_strs, norb, nelec, link_index=None, orbsym=None):
    ci_coeff, nelec, ci_strs = selected_ci._unpack(civec_strs, nelec)
    if link_index is None:
        link_index = selected_ci._all_linkstr_index(ci_strs, norb, nelec)
    cd_indexa, dd_indexa, cd_indexb, dd_indexb = link_index
    na, nlinka = nb, nlinkb = cd_indexa.shape[:2]

    eri = ao2mo.restore(1, eri, norb)
    eri1 = eri.transpose(0, 2, 1, 3) - eri.transpose(0, 2, 3, 1)
    idx, idy = numpy.tril_indices(norb, -1)
    idx = idx * norb + idy
    eri1 = lib.take_2d(eri1.reshape(norb**2, -1), idx, idx) * 2
    lib.transpose_sum(eri1, inplace=True)
    eri1 *= .5
    eri1, dd_indexa, dimirrep = selected_ci_symm.reorder4irrep(
        eri1, norb, dd_indexa, orbsym, -1)
    fcivec = ci_coeff.reshape(na, nb)
    ci1 = numpy.zeros_like(fcivec)
    # (aa|aa)
    if nelec[0] > 1:
        ma, mlinka = mb, mlinkb = dd_indexa.shape[:2]
        libfci.SCIcontract_2e_aaaa_symm(
            eri1.ctypes.data_as(ctypes.c_void_p),
            fcivec.ctypes.data_as(ctypes.c_void_p),
            ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb),
            ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(ma),
            ctypes.c_int(mlinka), dd_indexa.ctypes.data_as(ctypes.c_void_p),
            dimirrep.ctypes.data_as(ctypes.c_void_p),
            ctypes.c_int(len(dimirrep)))

    h_ps = numpy.einsum('pqqs->ps', eri) * (.5 / nelec[0])
    eri1 = eri.copy()
    for k in range(norb):
        eri1[:, :, k, k] += h_ps
        eri1[k, k, :, :] += h_ps
    eri1 = ao2mo.restore(4, eri1, norb)
    lib.transpose_sum(eri1, inplace=True)
    eri1 *= .5
    eri1, cd_indexa, dimirrep = selected_ci_symm.reorder4irrep(
        eri1, norb, cd_indexa, orbsym)
    # (bb|aa)
    libfci.SCIcontract_2e_bbaa_symm(eri1.ctypes.data_as(ctypes.c_void_p),
                                    fcivec.ctypes.data_as(ctypes.c_void_p),
                                    ci1.ctypes.data_as(ctypes.c_void_p),
                                    ctypes.c_int(norb), ctypes.c_int(na),
                                    ctypes.c_int(nb), ctypes.c_int(nlinka),
                                    ctypes.c_int(nlinkb),
                                    cd_indexa.ctypes.data_as(ctypes.c_void_p),
                                    cd_indexa.ctypes.data_as(ctypes.c_void_p),
                                    dimirrep.ctypes.data_as(ctypes.c_void_p),
                                    ctypes.c_int(len(dimirrep)))

    lib.transpose_sum(ci1, inplace=True)
    return selected_ci._as_SCIvector(ci1.reshape(ci_coeff.shape), ci_strs)
Esempio n. 15
0
def incore(eri, dm, hermi=0):
    assert (not numpy.iscomplexobj(eri))
    eri = numpy.ascontiguousarray(eri)
    dm = numpy.ascontiguousarray(dm)
    nao = dm.shape[0]
    vj = numpy.empty((nao, nao))
    vk = numpy.empty((nao, nao))
    npair = nao * (nao + 1) // 2
    if eri.ndim == 2 and npair * npair == eri.size:  # 4-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv')
        # 'ijkl,kl->ij'
        fvj = _fpointer('CVHFics4_kl_s2ij')
        # 'ijkl,il->jk'
        fvk = _fpointer('CVHFics4_il_s1jk')
        # or
        ## 'ijkl,ij->kl'
        #fvj = _fpointer('CVHFics4_ij_s2kl')
        ## 'ijkl,jk->il'
        #fvk = _fpointer('CVHFics4_jk_s1il')

        tridm = dm
    elif eri.ndim == 1 and npair * (npair +
                                    1) // 2 == eri.size:  # 8-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv')
        fvj = _fpointer('CVHFics8_tridm_vj')
        if hermi == 1:
            fvk = _fpointer('CVHFics8_jk_s2il')
        else:
            fvk = _fpointer('CVHFics8_jk_s1il')
        tridm = lib.pack_tril(lib.transpose_sum(dm))
        i = numpy.arange(nao)
        tridm[i * (i + 1) // 2 + i] *= .5
    else:
        raise RuntimeError('Array shape not consistent: DM %s, eri %s' %
                           (dm.shape, eri.shape))
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         tridm.ctypes.data_as(ctypes.c_void_p),
         vj.ctypes.data_as(ctypes.c_void_p),
         dm.ctypes.data_as(ctypes.c_void_p),
         vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk)
    if hermi != 0:
        vj = lib.hermi_triu(vj, hermi)
        vk = lib.hermi_triu(vk, hermi)
    else:
        vj = lib.hermi_triu(vj, 1)
    return vj, vk
Esempio n. 16
0
def incore(eri, dm, hermi=0):
    assert(not numpy.iscomplexobj(eri))
    eri = numpy.ascontiguousarray(eri)
    dm = numpy.ascontiguousarray(dm)
    nao = dm.shape[0]
    vj = numpy.empty((nao,nao))
    vk = numpy.empty((nao,nao))
    npair = nao*(nao+1)//2
    if eri.ndim == 2 and npair*npair == eri.size: # 4-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv')
        # 'ijkl,kl->ij'
        fvj = _fpointer('CVHFics4_kl_s2ij')
        # 'ijkl,il->jk'
        fvk = _fpointer('CVHFics4_il_s1jk')
        # or
        ## 'ijkl,ij->kl'
        #fvj = _fpointer('CVHFics4_ij_s2kl')
        ## 'ijkl,jk->il'
        #fvk = _fpointer('CVHFics4_jk_s1il')

        tridm = dm
    elif eri.ndim == 1 and npair*(npair+1)//2 == eri.size: # 8-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv')
        fvj = _fpointer('CVHFics8_tridm_vj')
        if hermi == 1:
            fvk = _fpointer('CVHFics8_jk_s2il')
        else:
            fvk = _fpointer('CVHFics8_jk_s1il')
        tridm = lib.pack_tril(lib.transpose_sum(dm))
        i = numpy.arange(nao)
        tridm[i*(i+1)//2+i] *= .5
    else:
        raise RuntimeError('Array shape not consistent: DM %s, eri %s'
                           % (dm.shape, eri.shape))
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         tridm.ctypes.data_as(ctypes.c_void_p),
         vj.ctypes.data_as(ctypes.c_void_p),
         dm.ctypes.data_as(ctypes.c_void_p),
         vk.ctypes.data_as(ctypes.c_void_p),
         ctypes.c_int(nao), fvj, fvk)
    if hermi != 0:
        vj = lib.hermi_triu(vj, hermi)
        vk = lib.hermi_triu(vk, hermi)
    else:
        vj = lib.hermi_triu(vj, 1)
    return vj, vk
Esempio n. 17
0
def contract_2e(eri, civec_strs, norb, nelec, link_index=None, orbsym=None):
    ci_coeff, nelec, ci_strs = selected_ci._unpack(civec_strs, nelec)
    if link_index is None:
        link_index = selected_ci._all_linkstr_index(ci_strs, norb, nelec)
    cd_indexa, dd_indexa, cd_indexb, dd_indexb = link_index
    na, nlinka = nb, nlinkb = cd_indexa.shape[:2]

    eri = ao2mo.restore(1, eri, norb)
    eri1 = eri.transpose(0,2,1,3) - eri.transpose(0,2,3,1)
    idx,idy = numpy.tril_indices(norb, -1)
    idx = idx * norb + idy
    eri1 = lib.take_2d(eri1.reshape(norb**2,-1), idx, idx) * 2
    lib.transpose_sum(eri1, inplace=True)
    eri1 *= .5
    eri1, dd_indexa, dimirrep = selected_ci_symm.reorder4irrep(eri1, norb, dd_indexa, orbsym, -1)
    fcivec = ci_coeff.reshape(na,nb)
    ci1 = numpy.zeros_like(fcivec)
    # (aa|aa)
    if nelec[0] > 1:
        ma, mlinka = mb, mlinkb = dd_indexa.shape[:2]
        libfci.SCIcontract_2e_aaaa_symm(eri1.ctypes.data_as(ctypes.c_void_p),
                                        fcivec.ctypes.data_as(ctypes.c_void_p),
                                        ci1.ctypes.data_as(ctypes.c_void_p),
                                        ctypes.c_int(norb),
                                        ctypes.c_int(na), ctypes.c_int(nb),
                                        ctypes.c_int(ma), ctypes.c_int(mlinka),
                                        dd_indexa.ctypes.data_as(ctypes.c_void_p),
                                        dimirrep.ctypes.data_as(ctypes.c_void_p),
                                        ctypes.c_int(len(dimirrep)))

    h_ps = numpy.einsum('pqqs->ps', eri) * (.5/nelec[0])
    eri1 = eri.copy()
    for k in range(norb):
        eri1[:,:,k,k] += h_ps
        eri1[k,k,:,:] += h_ps
    eri1 = ao2mo.restore(4, eri1, norb)
    lib.transpose_sum(eri1, inplace=True)
    eri1 *= .5
    eri1, cd_indexa, dimirrep = selected_ci_symm.reorder4irrep(eri1, norb, cd_indexa, orbsym)
    # (bb|aa)
    libfci.SCIcontract_2e_bbaa_symm(eri1.ctypes.data_as(ctypes.c_void_p),
                                    fcivec.ctypes.data_as(ctypes.c_void_p),
                                    ci1.ctypes.data_as(ctypes.c_void_p),
                                    ctypes.c_int(norb),
                                    ctypes.c_int(na), ctypes.c_int(nb),
                                    ctypes.c_int(nlinka), ctypes.c_int(nlinkb),
                                    cd_indexa.ctypes.data_as(ctypes.c_void_p),
                                    cd_indexa.ctypes.data_as(ctypes.c_void_p),
                                    dimirrep.ctypes.data_as(ctypes.c_void_p),
                                    ctypes.c_int(len(dimirrep)))

    lib.transpose_sum(ci1, inplace=True)
    return selected_ci._as_SCIvector(ci1.reshape(ci_coeff.shape), ci_strs)
Esempio n. 18
0
def contract_2e(eri, fcivec, norb, nelec, link_index=None, orbsym=None, wfnsym=0):
    if orbsym is None:
        return direct_spin0.contract_2e(eri, fcivec, norb, nelec, link_index)

    eri = ao2mo.restore(4, eri, norb)
    neleca, nelecb = direct_spin1._unpack_nelec(nelec)
    assert(neleca == nelecb)
    link_indexa = direct_spin0._unpack(norb, nelec, link_index)
    na, nlinka = link_indexa.shape[:2]
    eri_irs, rank_eri, irrep_eri = direct_spin1_symm.reorder_eri(eri, norb, orbsym)
    totirrep = len(eri_irs)

    strsa = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca))
    aidx, link_indexa = direct_spin1_symm.gen_str_irrep(strsa, orbsym, link_indexa,
                                                        rank_eri, irrep_eri, totirrep)

    Tirrep = ctypes.c_void_p*totirrep
    linka_ptr = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in link_indexa])
    eri_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in eri_irs])
    dimirrep = (ctypes.c_int*totirrep)(*[x.shape[0] for x in eri_irs])
    fcivec_shape = fcivec.shape
    fcivec = fcivec.reshape((na,na), order='C')
    ci1new = numpy.zeros_like(fcivec)
    nas = (ctypes.c_int*8)(*[x.size for x in aidx])

    ci0 = []
    ci1 = []
    for ir in range(totirrep):
        ma, mb = aidx[ir].size, aidx[wfnsym^ir].size
        ci0.append(numpy.zeros((ma,mb)))
        ci1.append(numpy.zeros((ma,mb)))
        if ma > 0 and mb > 0:
            lib.take_2d(fcivec, aidx[ir], aidx[wfnsym^ir], out=ci0[ir])
    ci0_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci0])
    ci1_ptrs = Tirrep(*[x.ctypes.data_as(ctypes.c_void_p) for x in ci1])
    libfci.FCIcontract_2e_symm1(eri_ptrs, ci0_ptrs, ci1_ptrs,
                                ctypes.c_int(norb), nas, nas,
                                ctypes.c_int(nlinka), ctypes.c_int(nlinka),
                                linka_ptr, linka_ptr, dimirrep,
                                ctypes.c_int(totirrep), ctypes.c_int(wfnsym))
    for ir in range(totirrep):
        if ci0[ir].size > 0:
            lib.takebak_2d(ci1new, ci1[ir], aidx[ir], aidx[wfnsym^ir])
    return lib.transpose_sum(ci1new, inplace=True).reshape(fcivec_shape)
Esempio n. 19
0
def make_hdiag(h1e, eri, norb, nelec):
    if isinstance(nelec, (int, numpy.number)):
        neleca = nelec // 2
    else:
        neleca, nelecb = nelec
        assert (neleca == nelecb)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    strs = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca))
    na = len(strs)
    hdiag = numpy.empty((na, na))
    jdiag = numpy.asarray(numpy.einsum('iijj->ij', eri), order='C')
    kdiag = numpy.asarray(numpy.einsum('ijji->ij', eri), order='C')
    libfci.FCImake_hdiag(hdiag.ctypes.data_as(ctypes.c_void_p),
                         h1e.ctypes.data_as(ctypes.c_void_p),
                         jdiag.ctypes.data_as(ctypes.c_void_p),
                         kdiag.ctypes.data_as(ctypes.c_void_p),
                         ctypes.c_int(norb), ctypes.c_int(na),
                         ctypes.c_int(neleca),
                         strs.ctypes.data_as(ctypes.c_void_p))
    # symmetrize hdiag to reduce numerical error
    hdiag = lib.transpose_sum(hdiag, inplace=True) * .5
    return hdiag.ravel()
Esempio n. 20
0
def make_hdiag(h1e, eri, norb, nelec):
    if isinstance(nelec, (int, numpy.number)):
        neleca = nelec//2
    else:
        neleca, nelecb = nelec
        assert(neleca == nelecb)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    strs = numpy.asarray(cistring.gen_strings4orblist(range(norb), neleca))
    na = len(strs)
    hdiag = numpy.empty((na,na))
    jdiag = numpy.asarray(numpy.einsum('iijj->ij',eri), order='C')
    kdiag = numpy.asarray(numpy.einsum('ijji->ij',eri), order='C')
    libfci.FCImake_hdiag(hdiag.ctypes.data_as(ctypes.c_void_p),
                         h1e.ctypes.data_as(ctypes.c_void_p),
                         jdiag.ctypes.data_as(ctypes.c_void_p),
                         kdiag.ctypes.data_as(ctypes.c_void_p),
                         ctypes.c_int(norb), ctypes.c_int(na),
                         ctypes.c_int(neleca),
                         strs.ctypes.data_as(ctypes.c_void_p))
# symmetrize hdiag to reduce numerical error
    hdiag = lib.transpose_sum(hdiag, inplace=True) * .5
    return hdiag.ravel()
Esempio n. 21
0
def kernel_ms0(fci, h1e, eri, norb, nelec, ci0=None, link_index=None,
               tol=None, lindep=None, max_cycle=None, max_space=None,
               nroots=None, davidson_only=None, pspace_size=None,
               max_memory=None, verbose=None, ecore=0, **kwargs):
    if nroots is None: nroots = fci.nroots
    if davidson_only is None: davidson_only = fci.davidson_only
    if pspace_size is None: pspace_size = fci.pspace_size

    assert(fci.spin is None or fci.spin == 0)

    link_index = _unpack(norb, nelec, link_index)
    h1e = numpy.ascontiguousarray(h1e)
    eri = numpy.ascontiguousarray(eri)
    na = link_index.shape[0]
    hdiag = fci.make_hdiag(h1e, eri, norb, nelec)

    addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag, max(pspace_size,nroots))
    if pspace_size > 0:
        pw, pv = scipy.linalg.eigh(h0)
    else:
        pw = pv = None

    if pspace_size >= na*na and ci0 is None and not davidson_only:
# The degenerated wfn can break symmetry.  The davidson iteration with proper
# initial guess doesn't have this issue
        if na*na == 1:
            return pw[0]+ecore, pv[:,0].reshape(1,1)
        elif nroots > 1:
            civec = numpy.empty((nroots,na*na))
            civec[:,addr] = pv[:,:nroots].T
            civec = civec.reshape(nroots,na,na)
            try:
                return pw[:nroots]+ecore, [_check_(ci) for ci in civec]
            except ValueError:
                pass
        elif abs(pw[0]-pw[1]) > 1e-12:
            civec = numpy.empty((na*na))
            civec[addr] = pv[:,0]
            civec = civec.reshape(na,na)
            civec = lib.transpose_sum(civec) * .5
            # direct diagonalization may lead to triplet ground state
##TODO: optimize initial guess.  Using pspace vector as initial guess may have
## spin problems.  The 'ground state' of psapce vector may have different spin
## state to the true ground state.
            try:
                return pw[0]+ecore, _check_(civec.reshape(na,na))
            except ValueError:
                pass

    precond = fci.make_precond(hdiag, pw, pv, addr)

    h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5)
    def hop(c):
        hc = fci.contract_2e(h2e, c.reshape(na,na), norb, nelec, link_index)
        return hc.ravel()

#TODO: check spin of initial guess
    if ci0 is None:
        if hasattr(fci, 'get_init_guess'):
            ci0 = fci.get_init_guess(norb, nelec, nroots, hdiag)
        else:
            ci0 = []
            for i in range(nroots):
                x = numpy.zeros(na,na)
                if addr[i] == 0:
                    x[0,0] = 1
                else:
                    addra = addr[i] // na
                    addrb = addr[i] % na
                    x[addra,addrb] = x[addrb,addra] = numpy.sqrt(.5)
                ci0.append(x.ravel())
    else:
        if isinstance(ci0, numpy.ndarray) and ci0.size == na*na:
            ci0 = [ci0.ravel()]
        else:
            ci0 = [x.ravel() for x in ci0]

    if tol is None: tol = fci.conv_tol
    if lindep is None: lindep = fci.lindep
    if max_cycle is None: max_cycle = fci.max_cycle
    if max_space is None: max_space = fci.max_space
    if max_memory is None: max_memory = fci.max_memory
    if verbose is None: verbose = logger.Logger(fci.stdout, fci.verbose)
    #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep)
    e, c = fci.eig(hop, ci0, precond, tol=tol, lindep=lindep,
                   max_cycle=max_cycle, max_space=max_space, nroots=nroots,
                   max_memory=max_memory, verbose=verbose, follow_state=True,
                   **kwargs)
    if nroots > 1:
        return e+ecore, [_check_(ci.reshape(na,na)) for ci in c]
    else:
        return e+ecore, _check_(c.reshape(na,na))
Esempio n. 22
0
def get_eri(mydf, kpts=None, compact=True):
    cell = mydf.cell
    if kpts is None:
        kptijkl = numpy.zeros((4,3))
    elif numpy.shape(kpts) == (3,):
        kptijkl = numpy.vstack([kpts]*4)
    else:
        kptijkl = numpy.reshape(kpts, (4,3))
    if mydf._cderi is None:
        mydf.build()

    kpti, kptj, kptk, kptl = kptijkl
    auxcell = mydf.auxcell
    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < 1e-9:
        eriR = numpy.zeros((nao_pair,nao_pair))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(j3cR.T, LpqR, 1, eriR, 1)
        eriR = lib.transpose_sum(eriR, inplace=True)

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8
        trilidx = numpy.tril_indices(nao)
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            pqkR = numpy.asarray(pqkR.reshape(nao,nao,-1)[trilidx], order='C')
            pqkI = numpy.asarray(pqkI.reshape(nao,nao,-1)[trilidx], order='C')
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            lib.dot(pqkR, pqkR.T, 1, eriR, 1)
            lib.dot(pqkI, pqkI.T, 1, eriR, 1)
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti-kptl).sum() < 1e-9) and (abs(kptj-kptk).sum() < 1e-9):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1)
        LpqR = LpqI = j3cR = j3cI = None

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
# rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1)
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
# rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        return (eriR.reshape((nao,)*4).transpose(0,1,3,2) +
                eriI.reshape((nao,)*4).transpose(0,1,3,2)*1j).reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1)
        LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4

        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory),
                         mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
# rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs))
#                 = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs))
#                 = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs)
# rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs))
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
        return eriR + eriI*1j
Esempio n. 23
0
def make_hdiag(h1e, eri, ci_strs, norb, nelec):
    hdiag = select_ci.make_hdiag(h1e, eri, ci_strs, norb, nelec)
    na = len(ci_strs[0])
    lib.transpose_sum(hdiag.reshape(na,na), inplace=True)
    hdiag *= .5
    return hdiag
Esempio n. 24
0
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None):
# dm2 = ccsd_rdm._make_rdm2(mycc, None, d2, with_dm1=False)
# dm2 = numpy.einsum('pi,ijkl->pjkl', mo_coeff, dm2)
# dm2 = numpy.einsum('pj,ijkl->ipkl', mo_coeff, dm2)
# dm2 = numpy.einsum('pk,ijkl->ijpl', mo_coeff, dm2)
# dm2 = numpy.einsum('pl,ijkl->ijkp', mo_coeff, dm2)
# dm2 = dm2 + dm2.transpose(1,0,2,3)
# dm2 = dm2 + dm2.transpose(0,1,3,2)
# return ao2mo.restore(4, dm2*.5, nmo)
    log = logger.Logger(mycc.stdout, mycc.verbose)
    time1 = time.clock(), time.time()
    if fsave is None:
        incore = True
        fsave = lib.H5TmpFile()
    else:
        incore = False
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    nocc, nvir = dovov.shape[:2]
    mo_coeff = numpy.asarray(mo_coeff, order='F')
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2
    nvir_pair = nvir * (nvir+1) //2

    fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
    ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1
    fmm = _ccsd.libcc.CCmmm_transpose_sum
    pao_loc = ctypes.POINTER(ctypes.c_void_p)()
    def _trans(vin, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow,nao_pair))
        fdrv(ftrans, fmm,
             out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p),
             ctypes.c_int(nrow), ctypes.c_int(nao),
             (ctypes.c_int*4)(*orbs_slice), pao_loc, ctypes.c_int(0))
        return out

    fswap = lib.H5TmpFile()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory*1e6/8/(nao_pair+nmo**2))
    blksize = min(nvir_pair, max(ccsd.BLKMIN, blksize))
    chunks_vv = (int(min(blksize,4e8/blksize)), blksize)
    fswap.create_dataset('v', (nao_pair,nvir_pair), 'f8', chunks=chunks_vv)
    for p0, p1 in lib.prange(0, nvir_pair, blksize):
        fswap['v'][:,p0:p1] = _trans(lib.unpack_tril(_cp(dvvvv[p0:p1])),
                                     (nocc,nmo,nocc,nmo)).T
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

# transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))
    blksize = int(max_memory*1e6/8/(nao_pair+nmo**2))
    blksize = min(nao_pair, max(ccsd.BLKMIN, blksize))
    fswap.create_dataset('o', (nmo,nocc,nao_pair), 'f8', chunks=(nocc,nocc,blksize))
    buf1 = numpy.zeros((nocc,nocc,nmo,nmo))
    buf1[:,:,:nocc,:nocc] = doooo
    buf1[:,:,nocc:,nocc:] = _cp(doovv)
    buf1 = _trans(buf1.reshape(nocc**2,-1), (0,nmo,0,nmo))
    fswap['o'][:nocc] = buf1.reshape(nocc,nocc,nao_pair)
    dovoo = numpy.asarray(dooov).transpose(2,3,0,1)
    for p0, p1 in lib.prange(nocc, nmo, nocc):
        buf1 = numpy.zeros((nocc,p1-p0,nmo,nmo))
        buf1[:,:,:nocc,:nocc] = dovoo[:,p0-nocc:p1-nocc]
        buf1[:,:,nocc:,:nocc] = dovvo[:,p0-nocc:p1-nocc]
        buf1[:,:,:nocc,nocc:] = dovov[:,p0-nocc:p1-nocc]
        buf1[:,:,nocc:,nocc:] = dovvv[:,p0-nocc:p1-nocc]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocc,-1)
        buf1 = _trans(buf1, (0,nmo,0,nmo))
        fswap['o'][p0:p1] = buf1.reshape(p1-p0,nocc,nao_pair)
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)
    dovoo = buf1 = None

# transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    gsave = fsave.create_dataset('dm2', (nao_pair,nao_pair), 'f8', chunks=chunks_vv)
    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmo,nmo))
        buf1[:,nocc:,nocc:] = lib.unpack_tril(_cp(fswap['v'][p0:p1]))
        buf1[:,:,:nocc] = fswap['o'][:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, (0,nmo,0,nmo))
        if p0 > 0:
            buf1 = _cp(gsave[:p0,p0:p1])
            buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T
            buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T
            gsave[:p0,p0:p1] = buf1
        lib.transpose_sum(buf2[:,p0:p1], inplace=True)
        gsave[p0:p1] = buf2
    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    if incore:
        return fsave['dm2'].value
    else:
        return fsave
Esempio n. 25
0
def make_hdiag(h1e, eri, ci_strs, norb, nelec):
    hdiag = selected_ci.make_hdiag(h1e, eri, ci_strs, norb, nelec)
    na = len(ci_strs[0])
    lib.transpose_sum(hdiag.reshape(na, na), inplace=True)
    hdiag *= .5
    return hdiag
Esempio n. 26
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
        # Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    if d2 is None:
        d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir

    # Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc, nocc))
    Ivv = numpy.zeros((nvir, nvir))
    Ivo = numpy.zeros((nvir, nocc))
    Xvo = numpy.zeros((nvir, nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(
        eris_ooov.reshape(-1, nvir).T,
        d_oooo.reshape(nocc, -1).T, 2)
    Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 -
            numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ooov = _cp(dooov)
    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
    #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
    Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir))
    Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc))
    #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
    #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
    eris_oovv = _cp(eris.oovv)
    tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))
    tmpooov = _cp(eris_ooov.transpose(0, 1, 3, 2))
    Ioo += lib.dot(tmpooov.reshape(-1, nocc).T, tmp)
    Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp)
    eris_oooo = tmp = None

    d_ooov = d_ooov + d_ooov.transpose(1, 0, 2, 3)
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov)
    #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov)
    Ioo += lib.dot(eris_ooov.reshape(nocc, -1), d_ooov.reshape(nocc, -1).T)
    Xvo += lib.dot(
        eris_ovov.reshape(-1, nvir).T,
        _cp(d_ooov.transpose(0, 2, 3, 1).reshape(nocc, -1)).T)
    d_ooov = None

    #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
    #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
    #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
    d_oovv = _cp(doovv + doovv.transpose(1, 0, 3, 2))
    for i in range(nocc):
        Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1),
                       d_oovv[i].reshape(nocc, -1).T)
    Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir))
    Ivo += lib.dot(d_oovv.reshape(-1, nvir).T, tmpooov.reshape(-1, nocc))
    d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir,
                                              nvir)).reshape(nocc, nocc, -1)
    eris_ooov = tmpooov = None

    blksize = 4
    d_ovov = numpy.empty((nocc, nvir, nocc, nvir))
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0, p1):
            d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1)
    d_ovvo = None
    #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1)
    lib.transpose_sum(d_ovov.reshape(nov, nov), inplace=True)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(
        d_ovov.reshape(-1, nvir).T,
        _cp(eris.ovoo).reshape(-1, nocc))
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir))

    nvir_pair = nvir * (nvir + 1) // 2
    bufe_ovvv = numpy.empty((blksize, nvir, nvir, nvir))
    bufc_ovvv = numpy.empty((blksize, nvir, nvir_pair))
    bufc_ovvv.data = bufe_ovvv.data
    c_vvvo = numpy.empty((nvir_pair, nvir, nocc))
    for p0, p1 in prange(0, nocc, blksize):
        d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1 - p0):
            lib.dot(dovvv[p0 + i].reshape(nvir, -1),
                    eris_oovv[p0 + i].reshape(nocc, -1).T, 1, Ivo, 1)

        c_ovvv = bufc_ovvv[:p1 - p0]
        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        _ccsd.precontract(dovvv[p0:p1].reshape(-1, nvir, nvir), out=c_ovvv)
        for i0, i1, in prange(0, nvir_pair, BLKSIZE):
            for j0, j1 in prange(0, nvir, BLKSIZE // (p1 - p0) + 1):
                c_vvvo[i0:i1, j0:j1, p0:p1] = c_ovvv[:, j0:j1,
                                                     i0:i1].transpose(2, 1, 0)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1 - p0):
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    d_oovv[p0 + i].reshape(nocc, -1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1)

        eris_ovvv = bufe_ovvv[:p1 - p0]
        lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair),
                        out=eris_ovvv.reshape(-1, nvir**2))
        eris_ovx = None
        #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov)
        d_ovvo = _cp(d_ovov[p0:p1].transpose(0, 1, 3, 2))
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1)

        e_ovvo, d_ovvo = d_ovvo, None
        for i in range(p1 - p0):
            d_ovvv[i] = _ccsd.sum021(dovvv[p0 + i])
            e_ovvo[i] = eris_ovov[p0 + i].transpose(0, 2, 1)
        #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        lib.dot(
            d_ovvv.reshape(-1, nvir).T, e_ovvo[:p1 - p0].reshape(-1, nocc), 1,
            Ivo, 1)
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir), 1, Ivv, 1)

        Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 -
                          numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv))
    d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None
    eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None

    eris_ovvv = _cp(eris.ovvv)
    bufe_vvvo = numpy.empty((blksize * nvir, nvir, nocc))
    bufe_vvvv = numpy.empty((blksize * nvir, nvir, nvir))
    bufd_vvvv = numpy.empty((blksize * nvir, nvir, nvir))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0 * (p0 + 1) // 2
        off1 = p1 * (p1 + 1) // 2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i * (i + 1) // 2 + i - off0] *= .5
        d_vvvv = lib.unpack_tril(d_vvvv, out=bufd_vvvv[:off1 - off0])
        eris_vvvv = lib.unpack_tril(eris.vvvv[off0:off1],
                                    out=bufe_vvvv[:off1 - off0])
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv)
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1)
        d_vvvo = _cp(c_vvvo[off0:off1])
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1)

        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        eris_vvvo = bufe_vvvo[:off1 - off0]
        for i0, i1 in prange(off0, off1, BLKSIZE):
            for j0, j1, in prange(0, nvir, BLKSIZE // nocc + 1):
                eris_vvvo[i0 - off0:i1 - off0,
                          j0:j1, :] = eris_ovvv[:, j0:j1,
                                                i0:i1].transpose(2, 1, 0)
        lib.dot(
            eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1)
        lib.dot(
            d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1)

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 27
0
def get_eri(mydf, kpts=None, compact=True):
    cell = mydf.cell
    if kpts is None:
        kptijkl = numpy.zeros((4, 3))
    elif numpy.shape(kpts) == (3, ):
        kptijkl = numpy.vstack([kpts] * 4)
    else:
        kptijkl = numpy.reshape(kpts, (4, 3))
    if mydf._cderi is None:
        mydf.build()

    kpti, kptj, kptk, kptl = kptijkl
    auxcell = mydf.auxcell
    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    nao_pair = nao * (nao + 1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

    ####################
    # gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < 1e-9:
        eriR = numpy.zeros((nao_pair, nao_pair))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory,
                                                   True):
            lib.ddot(j3cR.T, LpqR, 1, eriR, 1)
            LpqR = LpqI = j3cR = j3cI = None
        eriR = lib.transpose_sum(eriR, inplace=True)

        coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8
        trilidx = numpy.tril_indices(nao)
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            pqkR = numpy.asarray(pqkR.reshape(nao, nao, -1)[trilidx],
                                 order='C')
            pqkI = numpy.asarray(pqkI.reshape(nao, nao, -1)[trilidx],
                                 order='C')
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            lib.dot(pqkR, pqkR.T, 1, eriR, 1)
            lib.dot(pqkI, pqkI.T, 1, eriR, 1)
            pqkR = pqkI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1)
        return eriR

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti - kptl).sum() < 1e-9) and (abs(kptj - kptk).sum() < 1e-9):
        eriR = numpy.zeros((nao * nao, nao * nao))
        eriI = numpy.zeros((nao * nao, nao * nao))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory,
                                                   False):
            zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1)
            LpqR = LpqI = j3cR = j3cI = None

        coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
# rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        return (eriR.reshape((nao, ) * 4).transpose(0, 1, 3, 2) + eriI.reshape(
            (nao, ) * 4).transpose(0, 1, 3, 2) * 1j).reshape(nao**2, -1)


####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao * nao, nao * nao))
        eriI = numpy.zeros((nao * nao, nao * nao))
        for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1)
            LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None

        coulG = tools.get_coulG(cell, kptj - kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4

        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory),
                         mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
            # rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs))
            #                 = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs))
            #                 = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs)
            # rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs))
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = rskR = rskI = None
        return eriR + eriI * 1j
Esempio n. 28
0
File: rks.py Progetto: wwjCMP/pyscf
def make_h1(mf,
            mo_coeff,
            mo_occ,
            chkfile=None,
            atmlst=None,
            verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:, mo_occ > 0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    ni = copy.copy(mf._numint)
    if USE_XCFUN:
        try:
            ni.libxc = dft.xcfun
            xctype = ni._xc_type(mf.xc)
        except (ImportError, KeyError, NotImplementedError):
            ni.libxc = dft.libxc
            xctype = ni._xc_type(mf.xc)
    else:
        xctype = ni._xc_type(mf.xc)
    grids = mf.grids
    hyb = ni.libxc.hybrid_coeff(mf.xc)
    max_memory = 4000

    h1a = -(mol.intor('int1e_ipkin', comp=3) +
            mol.intor('int1e_ipnuc', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3)
        h1ao[:, p0:p1] += h1a[:, p0:p1]
        h1ao = h1ao + h1ao.transpose(0, 2, 1)

        shls_slice = (shl0, shl1) + (0, mol.nbas) * 3
        int2e_ip1 = mol._add_suffix('int2e_ip1')
        if abs(hyb) > 1e-10:
            vj1, vj2, vk1, vk2 = \
                    _vhf.direct_bindm(int2e_ip1, 's2kl',
                                      ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                      (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1 - hyb * .5 * vk1
            veff[:, p0:p1] += vj2 - hyb * .5 * vk2
        else:
            vj1, vj2 = \
                    _vhf.direct_bindm(int2e_ip1, 's2kl',
                                      ('ji->s2kl', 'lk->s1ij'),
                                      (-dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1
            veff[:, p0:p1] += vj2

        if xctype == 'LDA':
            ao_deriv = 1
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory):
                rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho = vxc[0]
                frr = fxc[0]
                half = lib.dot(ao[0], dm0[:, p0:p1].copy())
                rho1 = numpy.einsum('xpi,pi->xp', ao[1:, :, p0:p1], half)
                aow = numpy.einsum('pi,xp->xpi', ao[0], weight * frr * rho1)
                aow1 = numpy.einsum('xpi,p->xpi', ao[1:, :, p0:p1],
                                    weight * vrho)
                aow[:, :, p0:p1] += aow1
                veff[0] += lib.dot(-aow[0].T, ao[0])
                veff[1] += lib.dot(-aow[1].T, ao[0])
                veff[2] += lib.dot(-aow[2].T, ao[0])
                half = aow = aow1 = None

        elif xctype == 'GGA':

            def get_wv(rho, rho1, weight, vxc, fxc):
                vgamma = vxc[1]
                frr, frg, fgg = fxc[:3]
                ngrid = weight.size
                sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:])
                wv = numpy.empty((4, ngrid))
                wv[0] = frr * rho1[0]
                wv[0] += frg * sigma1 * 2
                wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:]
                wv[1:] += vgamma * rho1[1:] * 2
                wv *= weight
                return wv

            ao_deriv = 2
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory):
                rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho, vgamma = vxc[:2]
                # (d_X \nabla_x mu) nu DM_{mu,nu}
                half = lib.dot(ao[0], dm0[:, p0:p1].copy())
                rho1X = numpy.einsum('xpi,pi->xp', ao[[1, XX, XY, XZ], :,
                                                      p0:p1], half)
                rho1Y = numpy.einsum('xpi,pi->xp', ao[[2, YX, YY, YZ], :,
                                                      p0:p1], half)
                rho1Z = numpy.einsum('xpi,pi->xp', ao[[3, ZX, ZY, ZZ], :,
                                                      p0:p1], half)
                # (d_X mu) (\nabla_x nu) DM_{mu,nu}
                half = lib.dot(ao[1], dm0[:, p0:p1].copy())
                rho1X[1] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[1] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[1] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)
                half = lib.dot(ao[2], dm0[:, p0:p1].copy())
                rho1X[2] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[2] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[2] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)
                half = lib.dot(ao[3], dm0[:, p0:p1].copy())
                rho1X[3] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[3] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[3] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)

                wv = get_wv(rho, rho1X, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Y, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Z, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))

                wv = numpy.empty_like(rho)
                wv[0] = weight * vrho
                wv[1:] = rho[1:] * (weight * vgamma * 2)
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0, p0:p1] -= lib.dot(ao[1, :, p0:p1].T.copy(), aow)
                veff[1, p0:p1] -= lib.dot(ao[2, :, p0:p1].T.copy(), aow)
                veff[2, p0:p1] -= lib.dot(ao[3, :, p0:p1].T.copy(), aow)

                aow = numpy.einsum('npi,np->pi', ao[[XX, XY, XZ], :, p0:p1],
                                   wv[1:4])
                veff[0, p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[YX, YY, YZ], :, p0:p1],
                                   wv[1:4])
                veff[1, p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[ZX, ZY, ZZ], :, p0:p1],
                                   wv[1:4])
                veff[2, p0:p1] -= lib.dot(aow.T, ao[0])
        else:
            raise NotImplementedError('meta-GGA')

        veff = veff + veff.transpose(0, 2, 1)

        if chkfile is None:
            h1aos.append(h1ao + veff)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao + veff)
    if chkfile is None:
        return h1aos
    else:
        return chkfile
Esempio n. 29
0
def general(mydf, mo_coeffs, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    eri_mo = pwdf_ao2mo.general(mydf, mo_coeffs, kptijkl, compact)

    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))
        if sym:
            eri_mo *= .5  # because we'll do +cc later

        ijR = klR = None
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice,
                               j3cR, klR, klmosym, mokl, klslice, False)
            lib.ddot(ijR.T, klR, 1, eri_mo, 1)
            if not sym:
                ijR, klR = _dtrans(j3cR, ijR, ijmosym, moij, ijslice,
                                   LpqR, klR, klmosym, mokl, klslice, False)
                lib.ddot(ijR.T, klR, 1, eri_mo, 1)
            LpqR = LpqI = j3cR = j3cI = None
        if sym:
            eri_mo = lib.transpose_sum(eri_mo, inplace=True)
        return eri_mo

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_lk = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = buf = None
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            bufL = LpqR+LpqI*1j
            bufj = j3cR+j3cI*1j
            zij, zlk = _ztrans(bufL, zij, moij, ijslice,
                               bufj, zlk, molk, lkslice, False)
            lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1)
            if not sym:
                zij, zlk = _ztrans(bufj, zij, moij, ijslice,
                                   bufL, zlk, molk, lkslice, False)
                lib.dot(zij.T, zlk.conj(), 1, eri_lk, 1)
            LpqR = LpqI = j3cR = j3cI = bufL = bufj = None
        if sym:
            eri_lk += lib.transpose(eri_lk).conj()

        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_lk = lib.transpose(eri_lk.reshape(-1,nmol,nmok), axes=(0,2,1))
        eri_mo += eri_lk.reshape(nij_pair,nlk_pair)
        return eri_mo

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        max_memory *= .5

        zij = zkl = None
        for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice,
                               jrsR+jrsI*1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            zij, zkl = _ztrans(jpqR+jpqI*1j, zij, moij, ijslice,
                               LrsR+LrsI*1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None
        return eri_mo
Esempio n. 30
0
def update_amps(cc, t1, t2, eris, max_memory=2000):
    time0 = time.clock(), time.time()
    log = logger.Logger(cc.stdout, cc.verbose)
    nocc, nvir = t1.shape
    nov = nocc*nvir
    fock = eris.fock
    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij',  -t1, eris_ooov)
    woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2,-1)
    woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3))
    eris_ooov = None
    time1 = log.timer_debug1('woooo', *time0)

    unit = _memory_usage_inloop(nocc, nvir)*1e6/8
    max_memory = max_memory - lib.current_memory()[0]
    blksize = max(BLKMIN, int(max_memory*.95e6/8/unit))
    log.debug1('block size = %d, nocc = %d is divided into %d blocks',
               blksize, nocc, int((nocc+blksize-1)//blksize))

    for p0, p1 in prange(0, nocc, blksize):
# ==== read eris.ovvv ====
        eris_ovvv = _cp(eris.ovvv[p0:p1])
        eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1-p0)*nvir,-1))
        eris_ovvv = eris_ovvv.reshape(p1-p0,nvir,nvir,nvir)

        fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1], eris_ovvv)
        fvv += numpy.einsum('kc,kbca->ab',  -t1[p0:p1], eris_ovvv)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: tmp = numpy.einsum('ijcd,kdcb->ijbk', tau, eris.ovvv)
    #: t2new += numpy.einsum('ka,ijbk->ijba', -t1, tmp)
        #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy()
        eris_vvov = _cp(eris_ovvv.transpose(2,1,0,3).reshape(nvir*nvir,-1))
        tmp = numpy.empty((nocc,nocc,p1-p0,nvir))
        taubuf = numpy.empty((blksize,nocc,nvir,nvir))
        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1-j0])
            lib.dot(tau.reshape(-1,nvir*nvir), eris_vvov, 1,
                    tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0)
        tmp = _cp(tmp.transpose(0,1,3,2).reshape(-1,p1-p0))
        lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
        tau = tmp = eris_vvov = None
        #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2)

    #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
    #: t2new += woVoV.transpose()
        #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1)
        tmp = _cp(eris.ovoo[p0:p1].transpose(2,0,1,3))
        wOVov = lib.dot(tmp.reshape(-1,nocc), t1, -1)
        tmp = None
        wOVov = wOVov.reshape(nocc,p1-p0,nvir,nvir)
        #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1)
        lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wOVov.reshape(nocc,-1), 1)
        t2new[p0:p1] += wOVov.transpose(1,0,2,3)

        eris_ooov = _cp(eris.ooov[p0:p1])
        #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv)
        woVoV = lib.dot(_cp(eris_ooov.transpose(0,1,3,2).reshape(-1,nocc)), t1)
        woVoV = woVoV.reshape(p1-p0,nocc,nvir,nvir)
        for i in range(eris_ovvv.shape[0]):
            lib.dot(t1, eris_ovvv[i].reshape(nvir,-1), -1,
                    woVoV[i].reshape(nocc,-1), 1)

    #: theta = t2.transpose(0,1,3,2) * 2 - t2
    #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
        theta = numpy.empty((p1-p0,nocc,nvir,nvir))
        for i in range(p1-p0):
            theta[i] = t2[p0+i].transpose(0,2,1) * 2
            theta[i] -= t2[p0+i]
            lib.dot(_cp(theta[i].transpose(0,2,1).reshape(nocc,-1)),
                    eris_ovvv[i].reshape(-1,nvir), 1, t1new, 1)
        eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1)
        #==== mem usage blksize*(nvir**3+nocc*nvir**2*4)

# ==== read eris.ovov ====
        eris_ovov = _cp(eris.ovov[p0:p1])
        #==== mem usage blksize*(nocc*nvir**2*4)

        for i in range(p1-p0):
            t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5

        fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
        fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: t1new += numpy.einsum('jb,ijba->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta)
        t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta)
        #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta)
        lib.dot(_cp(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1)),
                theta.reshape(-1,nvir), -1, t1new, 1)
        eris_ooov = None

    #: wOVov += eris.ovov.transpose(0,1,3,2)
    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
        theta = _cp(theta.transpose(0,3,1,2))
        wOVov = _cp(wOVov.transpose(0,3,1,2))
        eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov)).reshape(nocc,nvir,-1,nvir)
        eris_OvoV = _cp(eris_OVov.transpose(0,3,2,1))
        wOVov += eris_OVov
        for j0, j1 in prange(0, nocc, blksize):
            t2iajb = t2[j0:j1].transpose(0,2,1,3).copy()
            #: wOVov[j0:j1] -= .5 * numpy.einsum('iack,jkbc->jbai', eris_ovov, t2)
            lib.dot(t2iajb.reshape(-1,nov), eris_OvoV.reshape(nov,-1),
                    -.5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1)
            tau, t2iajb = t2iajb, None
            for i in range(j1-j0):
                tau[i] *= 2
                tau[i] -= t2[j0+i].transpose(2,0,1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1)
            #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
            lib.dot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1),
                    .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
            tmp, tau = tau, None
            lib.dot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta.reshape(-1,nov),
                    1, tmp.reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,0,2)
            tmp = None
            #==== mem usage blksize*(nocc*nvir**2*8)
        theta = wOVov = eris_OvoV = eris_OVov = None
        time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2)
        #==== mem usage blksize*(nocc*nvir**2*2)

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        for i in range(p1-p0):
            tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1)
            tau += t2[p0+i]
            theta = tau.transpose(0,2,1) * 2
            theta -= tau
            lib.dot(_cp(eris_ovov[i].transpose(1,2,0)).reshape(nocc,-1),
                    theta.reshape(nocc,-1).T, 1, foo, 1)
            lib.dot(theta.reshape(-1,nvir).T,
                    eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1)
        tau = theta = None

# ==== read eris.oovv ====
        eris_oovv = _cp(eris.oovv[p0:p1])
        #==== mem usage blksize*(nocc*nvir**2*3)

        #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiab', -t1, tmp)
        #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp)
        for j in range(p1-p0):
            tmp = lib.dot(t1, eris_oovv[j].reshape(-1,nvir).T)
            tmp = _cp(tmp.reshape(nocc,nocc,nvir).transpose(0,2,1))
            t2new[p0+j] += lib.dot(tmp.reshape(-1,nocc), t1,
                                   -1).reshape(nocc,nvir,nvir).transpose(0,2,1)
            lib.dot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1))
            lib.dot(tmp.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1)
        tmp = None

    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: woVoV -= eris.oovv
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov)
    #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv)
        woVoV -= eris_oovv
        woVoV = woVoV.transpose(1,3,0,2).copy()
        eris_oVOv = _cp(eris_ovov.transpose(0,3,2,1))
        eris_oOvV = _cp(eris_ovov.transpose(0,2,1,3))
        #==== mem usage blksize*(nocc*nvir**2*4)

        taubuf = numpy.empty((blksize,nocc,nvir,nvir))
        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1-j0])
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            lib.numpy_helper._dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir,
                                    eris_oOvV.reshape(-1,nvir*nvir),
                                    tau.reshape(-1,nvir*nvir),
                                    woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1,
                                    0, 0, j0*nocc)
            for i in range(j1-j0):
                tau[i] -= t2[j0+i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
            lib.dot(_cp(tau.transpose(0,3,1,2).reshape(-1,nov)),
                    eris_oVOv.reshape(-1,nov).T,
                    1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1)
            #==== mem usage blksize*(nocc*nvir**2*6)
        time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=taubuf[:p1-p0])
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.dot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir),
                .5, t2new.reshape(nocc*nocc,-1), 1)
        eris_oovv = eris_ovov = eris_oVOv = eris_oOvV = taubuf = tau = None
        #==== mem usage blksize*(nocc*nvir**2*1)

        t2iajb = _cp(t2[p0:p1].transpose(0,2,1,3))
        t2ibja = _cp(t2[p0:p1].transpose(0,3,1,2))
        tmp = numpy.empty((blksize,nvir,nocc,nvir))
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                    t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,2,0)

            #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb)
            lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                    t2iajb.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,0,2)
        t2ibja = t2iajb = woVoV = tmp = None
        #==== mem usage blksize*(nocc*nvir**2*3)
        time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1)
# ==================
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1)

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new_tril[ij]  = t2new[i,j]
            t2new_tril[ij] += t2new[j,i].T
            ij += 1
    t2new = None
    time1 = log.timer_debug1('t2 tril', *time1)
    cc.add_wvvVV_(t1, t2, eris, t2new_tril, max_memory)
    time1 = log.timer_debug1('vvvv', *time1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    p0 = 0
    for i in range(nocc):
        dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1))
        t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2)
        p0 += i+1
    time1 = log.timer_debug1('g2/dijab', *time1)

    t2new = numpy.empty((nocc,nocc,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i):
            t2new[i,j] = t2new_tril[ij]
            t2new[j,i] = t2new_tril[ij].T
            ij += 1
        t2new[i,i] = t2new_tril[ij]
        ij += 1
    t2new_tril = None

#** update_amp_t1
    t1new += fock[:nocc,nocc:] \
           + numpy.einsum('ib,ab->ia', t1, fvv) \
           - numpy.einsum('ja,ji->ia', t1, foo)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new /= eia
#** end update_amp_t1
    time0 = log.timer_debug1('update t1 t2', *time0)

    return t1new, t2new
Esempio n. 31
0
def update_amps(mycc, t1, t2, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc*nvir
    fock = eris.fock

    t1t2new = numpy.zeros((nov+nov**2))
    t1new = t1t2new[:nov].reshape(t1.shape)
    t2new = t1t2new[nov:].reshape(t2.shape)
    t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir))
    mycc.add_wvvVV_(t1, t2, eris, t2new_tril)
    idxo = numpy.tril_indices(nocc)
    lib.takebak_2d(t2new.reshape(nocc**2,nvir**2), t2new_tril.reshape(-1,nvir**2),
                   idxo[0]*nocc+idxo[1], numpy.arange(nvir**2))
    idxo = numpy.arange(nocc)
    t2new[idxo,idxo] *= .5
    t2new_tril = None
    time1 = log.timer_debug1('vvvv', *time0)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()
    t1new += fov

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij',  -t1, eris_ooov)
    woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc**2,nocc**2), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2,nocc**2)
    woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3))
    eris_ooov = None
    time1 = log.timer_debug1('woooo', *time1)

    unit = _memory_usage_inloop(nocc, nvir)
    max_memory = max(2000, mycc.max_memory - lib.current_memory()[0])
    blksize = min(nocc, max(BLKMIN, int(max_memory/unit)))
    blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2))
    blknvir = min(nvir, max(BLKMIN, blknvir))
    log.debug1('max_memory %d MB,  nocc,nvir = %d,%d  blksize = %d,%d',
               max_memory, nocc, nvir, blksize, blknvir)
    nvir_pair = nvir * (nvir+1) // 2
    def prefect_ovvv(p0, p1, q0, q1, prefetch):
        if q1 != nvir:
            q0, q1 = q1, min(nvir, q1+blknvir)
            readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch)
            readbuf[:] = eris.ovvv[p0:p1,q0:q1]
    def prefect_ovov(p0, p1, buf):
        buf[:] = eris.ovov[p0:p1]
    def prefect_oovv(p0, p1, buf):
        buf[:] = eris.oovv[p0:p1]

    buflen = max(nocc*nvir**2, nocc**3)
    bufs = numpy.empty((5,blksize*buflen))
    buf1, buf2, buf3, buf4, buf5 = bufs
    for p0, p1 in prange(0, nocc, blksize):
    #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
        wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3)
        wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4)
        handler = None
        readbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir))
        for q0, q1 in lib.prange(0, nvir, blknvir):
            if q0 == 0:
                readbuf[:] = eris.ovvv[p0:p1,q0:q1]
            else:
                readbuf, prefetchbuf = prefetchbuf, readbuf
            handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf)
            eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf)
            #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1])
            eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf)
            eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir)

            #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
            #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv)
            #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp)
            if not mycc.direct:
                eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir))
                eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1)
                tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1)
                for j0, j1 in prange(0, nocc, blksize):
                    tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2)
                    tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau)
                    tau += t2[j0:j1,:,q0:q1]
                    lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1,
                             tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0)
                tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2)
                tmp1[:] = tmp.transpose(1,0,2,3)
                lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
                eris_vovv = tau = tmp1 = tmp = None

            fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv)
            fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv)

            #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv)
            tmp = t1[:,q0:q1].copy()
            for i in range(eris_ovvv.shape[0]):
                lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1,
                         wooVV[i].reshape(nocc,-1))

            #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1)
            tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1)
            lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1))
            wOoVv[:,:,q0:q1] = tmp

            #: theta = t2.transpose(1,0,2,3) * 2 - t2
            #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
            theta = tmp
            theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3)
            theta *= 2
            theta -= t2[:,p0:p1,q0:q1,:]
            lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1)
            theta = tmp = None
        handler.join()
        readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1)

        tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1)
        tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3)
        lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1)

        eris_ooov = _cp(eris.ooov[p0:p1])
        eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir))
        handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv)
        tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1)
        tmp[:] = eris_ooov.transpose(0,1,3,2)
        #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1)
        t2new[p0:p1] += wOoVv.transpose(1,0,2,3)

        #:eris_oovv = _cp(eris.oovv[p0:p1])
        handler.join()
        eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir))
        handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov)
    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)
        wooVV -= eris_oovv

        #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv)
        #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp)
        tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1)
        tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2)
        for j in range(p1-p0):
            tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1)
            lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2)
            t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir)
        eris_oovv = None

        #:eris_ovov = _cp(eris.ovov[p0:p1])
        handler.join()
        for i in range(p1-p0):
            t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5
        t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov)
        #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp)
        for j in range(p1-p0):
            lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1)
            lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1)
        tmp1 = tmp2 = tmp = None

        fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
        fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1)
        theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2)
        for i in range(p1-p0):
            tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau)
            tau += t2[p0+i]
            theta = lib.transpose(tau, axes=(0,2,1), out=theta)
            theta *= 2
            theta -= tau
            vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau)
            lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1)
            lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1)
        tau = theta = vov = None

    #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1)
    #: t1new += numpy.einsum('jb,ijba->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta)
        theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        for i in range(p1-p0):
            tmp = t2[p0+i].transpose(0,2,1) * 2
            tmp-= t2[p0+i]
            lib.ddot(eris_ooov[i].reshape(nocc,-1),
                     tmp.reshape(-1,nvir), -1, t1new, 1)
            lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i])  # theta[i] = tmp.transpose(2,0,1)
        t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta)
        eris_ooov = None

    #: wOVov += eris.ovov
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
        for i in range(p1-p0):
            wOoVv[:,i] += wooVV[i]*.5  #: jiba + ijba*.5
        wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5)
        wOVov = wOVov.reshape(nocc,nvir,-1,nvir)
        eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3)
        eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir)
        wOVov += eris_OVov
        theta = theta.reshape(-1,nov)
        for i in range(nocc):  # OVov-OVov.transpose(0,3,2,1)*.5
            eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5
        for j0, j1 in prange(0, nocc, blksize):
            tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2)
            for i in range(j1-j0):
                tau[i]  = t2[j0+i].transpose(1,0,2) * 2
                tau[i] -= t2[j0+i].transpose(2,0,1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1)
            #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
            lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1),
                     .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
            tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1,
                           tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir)
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,0,2)
        theta = wOoVv = wOVov = eris_OVov = tmp = tau = None
        time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv)
        tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        tmp[:] = wooVV.transpose(0,2,1,3)
        woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir)
        eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3)
        eris_oOvV[:] = eris_ovov.transpose(0,2,1,3)
        eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5)
        eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir)

        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2)
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir,
                   eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir),
                   woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc)
            for i in range(j1-j0):
                tau[i] -= t2[j0+i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
            lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov),
                     eris_oVOv.reshape(-1,nov).T,
                    1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1)
        time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2)
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir),
                 .5, t2new.reshape(nocc*nocc,-1), 1)
        eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None

        t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1),
                               out=buf1).reshape(-1,nvir,nocc,nvir)
        tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2)
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                     t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,2,0)
                t2new[j0+i] += tmp[i].transpose(1,0,2) * .5
        woVoV = t2ibja = tmp = None
        time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1)
    buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.ddot(ft_ij.T, t2.reshape(nocc,nocc*nvir**2),-1,
             t2new.reshape(nocc,nocc*nvir**2), 1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new += numpy.einsum('ib,ab->ia', t1, fvv)
    t1new -= numpy.einsum('ja,ji->ia', t1, foo)
    t1new /= eia

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new[i,j] += t2new[j,i].T
            t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j])
            t2new[j,i]  = t2new[i,j].T
            ij += 1

    time0 = log.timer_debug1('update t1 t2', *time0)
    return t1new, t2new
Esempio n. 32
0
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None):
    log = logger.Logger(mycc.stdout, mycc.verbose)
    time1 = time.clock(), time.time()
    if fsave is None:
        incore = True
        fsave = lib.H5TmpFile()
    else:
        incore = False
    dovov, dovOV, dOVov, dOVOV = d2[0]
    dvvvv, dvvVV, dVVvv, dVVVV = d2[1]
    doooo, dooOO, dOOoo, dOOOO = d2[2]
    doovv, dooVV, dOOvv, dOOVV = d2[3]
    dovvo, dovVO, dOVvo, dOVVO = d2[4]
    dvvov, dvvOV, dVVov, dVVOV = d2[5]
    dovvv, dovVV, dOVvv, dOVVV = d2[6]
    dooov, dooOV, dOOov, dOOOV = d2[7]
    mo_a = numpy.asarray(mo_coeff[0], order='F')
    mo_b = numpy.asarray(mo_coeff[1], order='F')

    nocca, nvira, noccb, nvirb = dovOV.shape
    nao, nmoa = mo_a.shape
    nmob = mo_b.shape[1]
    nao_pair = nao * (nao+1) // 2
    nvira_pair = nvira * (nvira+1) //2
    nvirb_pair = nvirb * (nvirb+1) //2

    fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
    ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1
    fmm = _ccsd.libcc.CCmmm_transpose_sum
    pao_loc = ctypes.POINTER(ctypes.c_void_p)()
    def _trans(vin, mo_coeff, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow,nao_pair))
        fdrv(ftrans, fmm,
             out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p),
             ctypes.c_int(nrow), ctypes.c_int(nao),
             (ctypes.c_int*4)(*orbs_slice), pao_loc, ctypes.c_int(0))
        return out

    fswap = lib.H5TmpFile()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize_a = int(max_memory*.9e6/8/(nao_pair+nmoa**2))
    blksize_a = min(nvira_pair, max(ccsd.BLKMIN, blksize_a))
    chunks_a = (int(min(nao_pair, 4e8/blksize_a)), blksize_a)
    v_aa = fswap.create_dataset('v_aa', (nao_pair,nvira_pair), 'f8',
                                chunks=chunks_a)
    for p0, p1 in lib.prange(0, nvira_pair, blksize_a):
        v_aa[:,p0:p1] = _trans(lib.unpack_tril(dvvvv[p0:p1]*.25), mo_a,
                               (nocca,nmoa,nocca,nmoa)).T

    v_ba = fswap.create_dataset('v_ab', (nao_pair,nvira_pair), 'f8',
                                chunks=chunks_a)
    dvvOP = fswap.create_dataset('dvvOP', (nvira_pair,noccb,nmob), 'f8',
                                 chunks=(int(min(blksize_a,4e8/nmob)),1,nmob))
    for i in range(noccb):
        buf1 = numpy.empty((nmob,nvira,nvira))
        buf1[:noccb] = dOOvv[i] * .5
        buf1[noccb:] = dOVvv[i]
        buf1 = buf1.transpose(1,2,0) + buf1.transpose(2,1,0)
        dvvOP[:,i] = buf1[numpy.tril_indices(nvira)]
    for p0, p1 in lib.prange(0, nvira_pair, blksize_a):
        buf1 = numpy.zeros((p1-p0,nmob,nmob))
        buf1[:,noccb:,noccb:] = lib.unpack_tril(dvvVV[p0:p1] * .5)
        buf1[:,:noccb,:] = dvvOP[p0:p1] * .5
        v_ba[:,p0:p1] = _trans(buf1, mo_b, (0,nmob,0,nmob)).T
    dvvOO = dvvOV = None

    blksize_b = int(max_memory*.9e6/8/(nao_pair+nmob**2))
    blksize_b = min(nvirb_pair, max(ccsd.BLKMIN, blksize_b))
    chunks_b = (int(min(nao_pair, 4e8/blksize_b)), blksize_b)
    v_bb = fswap.create_dataset('v_bb', (nao_pair,nvirb_pair), 'f8',
                                chunks=chunks_b)
    for p0, p1 in lib.prange(0, nvirb_pair, blksize_b):
        v_bb[:,p0:p1] = _trans(lib.unpack_tril(dVVVV[p0:p1]*.25), mo_b,
                               (noccb,nmob,noccb,nmob)).T
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

# transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))
    blksize = int(max_memory*.9e6/8/(nao_pair+nmoa**2))
    blksize = min(nao_pair, max(ccsd.BLKMIN, blksize))
    o_aa = fswap.create_dataset('o_aa', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize))
    o_ab = fswap.create_dataset('o_ab', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize))
    o_bb = fswap.create_dataset('o_bb', (nmob,noccb,nao_pair), 'f8', chunks=(noccb,noccb,blksize))
    buf1 = numpy.zeros((nocca,nocca,nmoa,nmoa))
    buf1[:,:,:nocca,:nocca] = _cp(doooo) * .25
    buf1[:,:,nocca:,nocca:] = _cp(doovv) * .5
    buf1 = _trans(buf1.reshape(nocca**2,-1), mo_a, (0,nmoa,0,nmoa))
    o_aa[:nocca] = buf1.reshape(nocca,nocca,nao_pair)

    buf1 = numpy.zeros((nocca,nocca,nmob,nmob))
    buf1[:,:,:noccb,:noccb] = _cp(dooOO) * .5
    buf1[:,:,:noccb,noccb:] = _cp(dooOV)
    buf1[:,:,noccb:,noccb:] = _cp(dooVV) * .5
    buf1 = _trans(buf1.reshape(nocca**2,-1), mo_b, (0,nmob,0,nmob))
    o_ab[:nocca] = buf1.reshape(nocca,nocca,nao_pair)

    buf1 = numpy.zeros((noccb,noccb,nmob,nmob))
    buf1[:,:,:noccb,:noccb] = _cp(dOOOO) * .25
    buf1[:,:,noccb:,noccb:] = _cp(dOOVV) * .5
    buf1 = _trans(buf1.reshape(noccb**2,-1), mo_b, (0,nmob,0,nmob))
    o_bb[:noccb] = buf1.reshape(noccb,noccb,nao_pair)

    dovoo = numpy.asarray(dooov).transpose(2,3,0,1)
    dovOO = numpy.asarray(dOOov).transpose(2,3,0,1)
    dOVOO = numpy.asarray(dOOOV).transpose(2,3,0,1)
    for p0, p1 in lib.prange(nocca, nmoa, nocca):
        buf1 = numpy.zeros((nocca,p1-p0,nmoa,nmoa))
        buf1[:,:,:nocca,:nocca] = dovoo[:,p0-nocca:p1-nocca]
        buf1[:,:,nocca:,:nocca] = dovvo[:,p0-nocca:p1-nocca] * .5
        buf1[:,:,:nocca,nocca:] = dovov[:,p0-nocca:p1-nocca] * .5
        buf1[:,:,nocca:,nocca:] = dovvv[:,p0-nocca:p1-nocca]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1)
        buf1 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        o_aa[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair)

        buf1 = numpy.zeros((nocca,p1-p0,nmob,nmob))
        buf1[:,:,:noccb,:noccb] = dovOO[:,p0-nocca:p1-nocca]
        buf1[:,:,noccb:,:noccb] = dovVO[:,p0-nocca:p1-nocca]
        buf1[:,:,:noccb,noccb:] = dovOV[:,p0-nocca:p1-nocca]
        buf1[:,:,noccb:,noccb:] = dovVV[:,p0-nocca:p1-nocca]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1)
        buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        o_ab[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair)

    for p0, p1 in lib.prange(noccb, nmob, noccb):
        buf1 = numpy.zeros((noccb,p1-p0,nmob,nmob))
        buf1[:,:,:noccb,:noccb] = dOVOO[:,p0-noccb:p1-noccb]
        buf1[:,:,noccb:,:noccb] = dOVVO[:,p0-noccb:p1-noccb] * .5
        buf1[:,:,:noccb,noccb:] = dOVOV[:,p0-noccb:p1-noccb] * .5
        buf1[:,:,noccb:,noccb:] = dOVVV[:,p0-noccb:p1-noccb]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*noccb,-1)
        buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        o_bb[p0:p1] = buf1.reshape(p1-p0,noccb,nao_pair)
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)
    dovoo = buf1 = None

# transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    dm2a = fsave.create_dataset('dm2aa+ab', (nao_pair,nao_pair), 'f8',
                                chunks=(int(min(nao_pair,4e8/blksize)),blksize))
    dm2b = fsave.create_dataset('dm2bb+ab', (nao_pair,nao_pair), 'f8',
                                chunks=(int(min(nao_pair,4e8/blksize)),blksize))
    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmoa,nmoa))
        buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_aa[p0:p1]))
        buf1[:,:,:nocca] = o_aa[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        if p0 > 0:
            buf1 = _cp(dm2a[:p0,p0:p1])
            buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T
            buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T
            dm2a[:p0,p0:p1] = buf1
        lib.transpose_sum(buf2[:,p0:p1], inplace=True)
        dm2a[p0:p1] = buf2
        buf1 = buf2 = None

    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmob,nmob))
        buf1[:,noccb:,noccb:] = lib.unpack_tril(_cp(v_bb[p0:p1]))
        buf1[:,:,:noccb] = o_bb[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        if p0 > 0:
            buf1 = _cp(dm2b[:p0,p0:p1])
            buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T
            buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T
            dm2b[:p0,p0:p1] = buf1
        lib.transpose_sum(buf2[:,p0:p1], inplace=True)
        dm2b[p0:p1] = buf2
        buf1 = buf2 = None

    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmoa,nmoa))
        buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_ba[p0:p1]))
        buf1[:,:,:nocca] = o_ab[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        dm2a[:,p0:p1] = dm2a[:,p0:p1] + buf2.T
        dm2b[p0:p1] = dm2b[p0:p1] + buf2
        buf1 = buf2 = None

    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    if incore:
        return (fsave['dm2aa+ab'].value, fsave['dm2bb+ab'].value)
    else:
        return fsave
Esempio n. 33
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
        # Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    if d2 is None:
        _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
        ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
        dovov = fd2intermediate['dovov']
        dvvvv = fd2intermediate['dvvvv']
        doooo = fd2intermediate['doooo']
        doovv = fd2intermediate['doovv']
        dovvo = fd2intermediate['dovvo']
        dovvv = fd2intermediate['dovvv']
        dooov = fd2intermediate['dooov']
    else:
        dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    nvir_pair = nvir * (nvir + 1) // 2
    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name, 'w')
    fswap.create_group('e_vvov')
    fswap.create_group('c_vvov')

    # Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc, nocc))
    Ivv = numpy.zeros((nvir, nvir))
    Ivo = numpy.zeros((nvir, nocc))
    Xvo = numpy.zeros((nvir, nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(
        eris_ooov.reshape(-1, nvir).T,
        d_oooo.reshape(nocc, -1).T, 2)
    Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 -
            numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ovov = numpy.empty((nocc, nvir, nocc, nvir))
    blksize = 8
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0, p1):
            d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1)
    d_ovvo = None
    d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape(
        nocc, nvir, nocc, nvir)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(
        d_ovov.reshape(-1, nvir).T,
        _cp(eris.ovoo).reshape(-1, nocc))
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir))
    eris_ovov = None
    fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2)
    d_ovov = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2)
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    iobuflen = int(256e6 / 8 / (blksize * nvir))
    log.debug1(
        'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        d_ooov = _cp(dooov[p0:p1])
        eris_oooo = _cp(eris.oooo[p0:p1])
        eris_ooov = _cp(eris.ooov[p0:p1])
        #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
        #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
        Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir))
        Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc))
        #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
        eris_oovv = _cp(eris.oovv[p0:p1])
        tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))
        Ioo += lib.dot(
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp)
        Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp)
        eris_oooo = tmp = None

        d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3)
        eris_ovov = _cp(eris.ovov[p0:p1])
        #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov)
        for i in range(p1 - p0):
            lib.dot(eris_ooov[i].reshape(nocc, -1),
                    d_ooov[i].reshape(nocc, -1).T, 1, Ioo, 1)
            lib.dot(eris_ovov[i].reshape(nvir, -1),
                    d_ooov[i].reshape(nocc, -1).T, 1, Xvo, 1)
        d_ooov = None

        #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
        #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
        #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
        d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2)
        for i in range(p1 - p0):
            Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1),
                           d_oovv[i].reshape(nocc, -1).T)
        Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir))
        Ivo += lib.dot(
            d_oovv.reshape(-1, nvir).T,
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)))
        eris_ooov = None
        d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(
            p1 - p0, nocc, -1)

        d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir))
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1 - p0):
            Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1),
                           eris_oovv[i].reshape(nocc, -1).T)
        eris_oovv = None

        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir))
        ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv,
                                        iobuflen)
        c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep,
                                        eris_ovx.reshape(-1, nvir_pair),
                                        iobuflen)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1 - p0):
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1)
        c_ovvv = d_oovv = None

        eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc))
        for i in range(p1 - p0):
            d_ovvv[i] = _ccsd.sum021(d_ovvv[i])
            eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1)
        #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov)
        Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc))
        eris_ovvo = eris_ovov = None

        eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair))
        eris_ovx = None
        eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir))
        Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 -
                          numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv))

        d_ovvo = _cp(fswap['dovvo'][p0:p1])
        #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv)
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1)

        d_ovvv = d_ovvo = eris_ovvv = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = nocc * nvir**2 + nvir**3 * 2.5
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    log.debug1(
        'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0 * (p0 + 1) // 2
        off1 = p1 * (p1 + 1) // 2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i * (i + 1) // 2 + i - off0] *= .5
        d_vvvv = lib.unpack_tril(d_vvvv)
        eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1]))
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv)
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1)
        #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2))
        d_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1,
                                     d_vvov.reshape(-1, nov))
        d_vvvo = _cp(d_vvov.transpose(0, 2, 1))
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1)
        d_vvov = eris_vvvv = None

        eris_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1,
                                     eris_vvov.reshape(-1, nov))
        eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1))
        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        lib.dot(
            d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1)
        lib.dot(
            eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1)
        eris_vvov = eris_vovv = d_vvvv = None

    del (fswap['e_vvov'])
    del (fswap['c_vvov'])
    del (fswap['dovvo'])
    fswap.close()
    _tmpfile = None

    if d2 is None:
        for key in fd2intermediate.keys():
            del (fd2intermediate[key])
        fd2intermediate.close()
        _d2tmpfile = None

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 34
0
def kernel_ms0(fci,
               h1e,
               eri,
               norb,
               nelec,
               ci0=None,
               link_index=None,
               tol=None,
               lindep=None,
               max_cycle=None,
               max_space=None,
               nroots=None,
               davidson_only=None,
               pspace_size=None,
               max_memory=None,
               verbose=None,
               ecore=0,
               **kwargs):
    if nroots is None: nroots = fci.nroots
    if davidson_only is None: davidson_only = fci.davidson_only
    if pspace_size is None: pspace_size = fci.pspace_size

    assert (fci.spin is None or fci.spin == 0)

    link_index = _unpack(norb, nelec, link_index)
    h1e = numpy.ascontiguousarray(h1e)
    eri = numpy.ascontiguousarray(eri)
    na = link_index.shape[0]
    hdiag = fci.make_hdiag(h1e, eri, norb, nelec)

    addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag,
                          max(pspace_size, nroots))
    if pspace_size > 0:
        pw, pv = fci.eig(h0)
    else:
        pw = pv = None

    if pspace_size >= na * na and ci0 is None and not davidson_only:
        # The degenerated wfn can break symmetry.  The davidson iteration with proper
        # initial guess doesn't have this issue
        if na * na == 1:
            return pw[0] + ecore, pv[:, 0].reshape(1, 1)
        elif nroots > 1:
            civec = numpy.empty((nroots, na * na))
            civec[:, addr] = pv[:, :nroots].T
            civec = civec.reshape(nroots, na, na)
            try:
                return pw[:nroots] + ecore, [_check_(ci) for ci in civec]
            except ValueError:
                pass
        elif abs(pw[0] - pw[1]) > 1e-12:
            civec = numpy.empty((na * na))
            civec[addr] = pv[:, 0]
            civec = civec.reshape(na, na)
            civec = lib.transpose_sum(civec) * .5
            # direct diagonalization may lead to triplet ground state
            ##TODO: optimize initial guess.  Using pspace vector as initial guess may have
            ## spin problems.  The 'ground state' of psapce vector may have different spin
            ## state to the true ground state.
            try:
                return pw[0] + ecore, _check_(civec.reshape(na, na))
            except ValueError:
                pass

    precond = fci.make_precond(hdiag, pw, pv, addr)

    h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5)

    def hop(c):
        hc = fci.contract_2e(h2e, c.reshape(na, na), norb, nelec, link_index)
        return hc.ravel()


#TODO: check spin of initial guess

    if ci0 is None:
        if hasattr(fci, 'get_init_guess'):
            ci0 = fci.get_init_guess(norb, nelec, nroots, hdiag)
        else:
            ci0 = []
            for i in range(nroots):
                x = numpy.zeros(na, na)
                if addr[i] == 0:
                    x[0, 0] = 1
                else:
                    addra = addr[i] // na
                    addrb = addr[i] % na
                    x[addra, addrb] = x[addrb, addra] = numpy.sqrt(.5)
                ci0.append(x.ravel())
    else:
        if isinstance(ci0, numpy.ndarray) and ci0.size == na * na:
            ci0 = [ci0.ravel()]
        else:
            ci0 = [x.ravel() for x in ci0]

    if tol is None: tol = fci.conv_tol
    if lindep is None: lindep = fci.lindep
    if max_cycle is None: max_cycle = fci.max_cycle
    if max_space is None: max_space = fci.max_space
    if max_memory is None: max_memory = fci.max_memory
    if verbose is None: verbose = logger.Logger(fci.stdout, fci.verbose)
    #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep)
    e, c = fci.eig(hop,
                   ci0,
                   precond,
                   tol=tol,
                   lindep=lindep,
                   max_cycle=max_cycle,
                   max_space=max_space,
                   nroots=nroots,
                   max_memory=max_memory,
                   verbose=verbose,
                   follow_state=True,
                   **kwargs)
    if nroots > 1:
        return e + ecore, [_check_(ci.reshape(na, na)) for ci in c]
    else:
        return e + ecore, _check_(c.reshape(na, na))
Esempio n. 35
0
def _rdm2_mo2ao(mycc, d2, dm1, mo_coeff, fsave=None):
    log = logger.Logger(mycc.stdout, mycc.verbose)
    if fsave is None:
        _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fsave = h5py.File(_dm2file.name, 'w')
    else:
        _dm2file = None
    time1 = time.clock(), time.time()
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2
    nocc, nvir = dovov.shape[:2]
    nov = nocc * nvir
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    nvir_pair = nvir * (nvir + 1) // 2
    mo_coeff = numpy.asarray(mo_coeff, order='F')

    def _trans(vin, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow, nao_pair))
        fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
        pao_loc = ctypes.POINTER(ctypes.c_void_p)()
        fdrv(_ccsd.libcc.AO2MOtranse2_nr_s1, _ccsd.libcc.CCmmm_transpose_sum,
             out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow),
             ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc,
             ctypes.c_int(0))
        return out

# transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))

    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name)
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(
        1, int(max_memory * 1e6 / 8 / (nmo * nao_pair + nmo**3 + nvir**3)))
    iobuflen = int(256e6 / 8 / (blksize * nmo))
    log.debug1('_rdm2_mo2ao pass 1: blksize = %d, iobuflen = %d', blksize,
               iobuflen)
    fswap.create_group('o')  # for h5py old version
    pool1 = numpy.empty((blksize, nmo, nmo, nmo))
    pool2 = numpy.empty((blksize, nmo, nao_pair))
    bufd_ovvv = numpy.empty((blksize, nvir, nvir, nvir))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        buf1 = pool1[:p1 - p0]
        buf1[:, :nocc, :nocc, :nocc] = doooo[p0:p1]
        buf1[:, :nocc, :nocc, nocc:] = dooov[p0:p1]
        buf1[:, :nocc, nocc:, :nocc] = 0
        buf1[:, :nocc, nocc:, nocc:] = doovv[p0:p1]
        buf1[:, nocc:, :nocc, :nocc] = 0
        buf1[:, nocc:, :nocc, nocc:] = dovov[p0:p1]
        buf1[:, nocc:, nocc:, :nocc] = dovvo[p0:p1]
        d_ovvv = bufd_ovvv[:p1 - p0]
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        buf1[:, nocc:, nocc:, nocc:] = d_ovvv
        for i in range(p0, p1):
            buf1[i - p0, i, :, :] += dm1
            buf1[i - p0, :, :, i] -= dm1 * .5
        buf2 = pool2[:p1 - p0].reshape(-1, nao_pair)
        _trans(buf1.reshape(-1, nmo**2), (0, nmo, 0, nmo), buf2)
        ao2mo.outcore._transpose_to_h5g(fswap, 'o/%d' % istep, buf2, iobuflen)
    pool1 = pool2 = bufd_ovvv = None
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

    fswap.create_group('v')  # for h5py old version
    pool1 = numpy.empty((blksize * nvir, nao_pair))
    pool2 = numpy.empty((blksize * nvir, nvir, nvir))
    for istep, (p0, p1) in enumerate(prange(0, nvir_pair, blksize * nvir)):
        buf1 = _cp(dvvvv[p0:p1])
        buf2 = lib.unpack_tril(buf1, out=pool2[:p1 - p0])
        buf1 = _trans(buf2, (nocc, nmo, nocc, nmo), out=pool1[:p1 - p0])
        ao2mo.outcore._transpose_to_h5g(fswap, 'v/%d' % istep, buf1, iobuflen)
    pool1 = pool2 = None
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)

    # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(nao, int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)))
    iobuflen = int(256e6 / 8 / blksize)
    log.debug1('_rdm2_mo2ao pass 3: blksize = %d, iobuflen = %d', blksize,
               iobuflen)
    gsave = fsave.create_group('dm2')
    for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)):
        gsave.create_dataset(str(istep), (nao_pair, p1 - p0), 'f8')
    diagidx = numpy.arange(nao)
    diagidx = diagidx * (diagidx + 1) // 2 + diagidx
    pool1 = numpy.empty((blksize, nmo, nmo))
    pool2 = numpy.empty((blksize, nvir_pair))
    pool3 = numpy.empty((blksize, nvir, nvir))
    pool4 = numpy.empty((blksize, nao_pair))
    for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)):
        buf1 = pool1[:p1 - p0]
        ao2mo.outcore._load_from_h5g(fswap['o'], p0, p1,
                                     buf1[:, :nocc].reshape(p1 - p0, -1))
        buf2 = ao2mo.outcore._load_from_h5g(fswap['v'], p0, p1,
                                            pool2[:p1 - p0])
        buf3 = lib.unpack_tril(buf2, out=pool3[:p1 - p0])
        buf1[:, nocc:, nocc:] = buf3
        buf1[:, nocc:, :nocc] = 0
        buf2 = _trans(buf1, (0, nmo, 0, nmo), out=pool4[:p1 - p0])
        ic = 0
        idx = diagidx[diagidx < p1]
        if p0 > 0:
            buf1 = _cp(gsave[str(istep)][:p0])
            for i0, i1 in prange(0, p1 - p0, BLKSIZE):
                for j0, j1, in prange(0, p0, BLKSIZE):
                    buf1[j0:j1, i0:i1] += buf2[i0:i1, j0:j1].T
                    buf2[i0:i1, j0:j1] = buf1[j0:j1, i0:i1].T
            buf1[:, idx[p0 <= idx] - p0] *= .5
            gsave[str(istep)][:p0] = buf1
        lib.transpose_sum(buf2[:, p0:p1], inplace=True)
        buf2[:, idx] *= .5
        for ic, (i0, i1) in enumerate(prange(0, nao_pair, blksize)):
            gsave[str(ic)][p0:p1] = buf2[:, i0:i1]
    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    del (fswap['o'])
    del (fswap['v'])
    fswap.close()
    _tmpfile = None
    time1 = log.timer_debug1('_rdm2_mo2ao cleanup', *time1)
    if _dm2file is not None:
        nvir_pair = nvir * (nvir + 1) // 2
        dm2 = numpy.empty((nvir_pair, nvir_pair))
        ao2mo.outcore._load_from_h5g(fsave['dm2'], 0, nvir_pair, dm2)
        fsave.close()
        _dm2file = None
        return dm2
    else:
        return fsave
Esempio n. 36
0
def make_hdiag(h1e, eri, norb, nelec):
    hdiag = direct_spin1.make_hdiag(h1e, eri, norb, nelec)
    na = int(numpy.sqrt(hdiag.size))
# symmetrize hdiag to reduce numerical error
    hdiag = lib.transpose_sum(hdiag.reshape(na,na), inplace=True) * .5
    return hdiag.ravel()
Esempio n. 37
0
def update_amps(cc, t1, t2, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(cc.stdout, cc.verbose)
    nocc, nvir = t1.shape
    nov = nocc*nvir
    fock = eris.fock
    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij',  -t1, eris_ooov)
    woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2,-1)
    woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3))
    time1 = log.timer_debug1('woooo', *time0)

    eris_ovvv = _cp(eris.ovvv)
    eris_ovvv = unpack_tril(eris_ovvv.reshape(nov,-1))
    eris_ovvv = eris_ovvv.reshape(nocc,nvir,nvir,nvir)

    fvv += numpy.einsum('kc,kcba->ab', 2*t1, eris_ovvv)
    fvv += numpy.einsum('kc,kbca->ab',  -t1, eris_ovvv)

    #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov)
    #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv)
    woVoV = lib.dot(_cp(eris_ooov.transpose(0,1,3,2).reshape(-1,nocc)), t1)
    woVoV = woVoV.reshape(nocc,nocc,nvir,nvir)

#: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
#: tmp = numpy.einsum('ijcd,kcdb->kijb', tau, eris.ovvv)
#: t2new += numpy.einsum('ka,kijb->jiba', -t1, tmp)
    tau = make_tau(t2, t1, t1)
    tmp = numpy.empty((nocc,nocc,nocc,nvir))
    for k in range(nocc):
        tmp[k] = lib.dot(tau.reshape(-1,nvir**2),
                         eris_ovvv[k].reshape(-1,nvir)).reshape(nocc,nocc,nvir).transpose(1,0,2)
        lib.dot(t1, eris_ovvv[k].reshape(nvir,-1), -1, woVoV[k].reshape(nocc,-1), 1)
    lib.dot(tmp.reshape(nocc,-1).T, t1, -1, t2new.reshape(-1,nvir), 1)
    tmp = None

#: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
#: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
#: t2new += woVoV.transpose()
    #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo, t1)
    wOVov, tau = tau, None
    lib.dot(_cp(_cp(eris.ooov).transpose(0,2,3,1).reshape(-1,nocc)), t1,
            -1, wOVov.reshape(-1,nvir))
    #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1)
    lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wOVov.reshape(nocc,-1), 1)
    for i in range(nocc):
        t2new[i] += wOVov[i].transpose(0,2,1)

#: theta = t2.transpose(0,1,3,2) * 2 - t2
#: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
    theta = numpy.empty((nocc,nocc,nvir,nvir))
    for i in range(nocc):
        theta[i] = t2[i].transpose(0,2,1) * 2
        theta[i] -= t2[i]
        lib.dot(_cp(theta[i].transpose(0,2,1).reshape(nocc,-1)),
                eris_ovvv[i].reshape(-1,nvir), 1, t1new, 1)
    eris_ovvv = None

    eris_ovov = _cp(eris.ovov)

    for i in range(nocc):
        t2new[i] += eris_ovov[i].transpose(1,0,2) * .5

    fov += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
    fov -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

#: theta = t2.transpose(1,0,2,3) * 2 - t2
#: t1new += numpy.einsum('jb,ijab->ia', fov, theta)
#: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta)
    t1new += numpy.einsum('jb,jiab->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov, theta)
    lib.dot(_cp(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1)),
            theta.reshape(-1,nvir), -1, t1new, 1)
    eris_ooov = None

#: wOVov += eris.ovov.transpose(0,1,3,2)
#: theta = t2.transpose(1,0,2,3) * 2 - t2
#: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
#: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
#: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
#: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
    wOVov = _cp(wOVov.transpose(0,3,1,2))
    eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov)).reshape(nocc,nvir,-1,nvir)
    eris_OvoV = _cp(eris_OVov.transpose(0,3,2,1))
    wOVov += eris_OVov
    t2iajb = t2.transpose(0,2,1,3).copy()
    #: wOVov[j0:j1] -= .5 * numpy.einsum('iakc,jkbc->jbai', eris_ovov, t2)
    lib.dot(t2iajb.reshape(-1,nov), eris_OvoV.reshape(nov,-1),
            -.5, wOVov.reshape(nov,-1), 1)
    tau, t2iajb = t2iajb, None
    for i in range(nocc):
        tau[i] = tau[i]*2 - t2[i].transpose(2,0,1)
        tau[i] -= numpy.einsum('a,jb->bja', t1[i]*2, t1)
    #: wOVov += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
    lib.dot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1),
            .5, wOVov.reshape(nov,-1), 1)

    #theta = t2 * 2 - t2.transpose(0,1,3,2)
    #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
    tmp, tau = tau, None
    theta = _cp(theta.transpose(0,3,1,2).reshape(nov,-1))
    lib.dot(wOVov.reshape(nov,-1), theta.T, 1, tmp.reshape(nov,-1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1,0,2)
    tmp = wOVov = eris_OvoV = eris_OVov = None

#: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
#: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
    for i in range(nocc):
        tau = numpy.einsum('a,jb->jab', t1[i]*.5, t1) + t2[i]
        theta = tau.transpose(0,2,1)*2 - tau
        lib.dot(_cp(eris_ovov[i].transpose(1,2,0)).reshape(nocc,-1),
                theta.reshape(nocc,-1).T, 1, foo, 1)
        lib.dot(theta.reshape(-1,nvir).T,
                eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1)
    tau = theta = None

    eris_oovv = _cp(eris.oovv)
    #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv)
    #:t2new += numpy.einsum('ka,jibk->jiab', -t1, tmp)
    #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
    #:t2new += numpy.einsum('ka,jibk->jiba', -t1, tmp)
    for j in range(nocc):
        tmp = lib.dot(t1, eris_oovv[j].reshape(-1,nvir).T)
        tmp = _cp(tmp.reshape(nocc,nocc,nvir).transpose(0,2,1))
        t2new[j] += lib.dot(tmp.reshape(-1,nocc), t1,
                            -1).reshape(nocc,nvir,nvir).transpose(0,2,1)
        lib.dot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1))
        lib.dot(tmp.reshape(-1,nocc), t1, -1, t2new[j].reshape(-1,nvir), 1)
    tmp = None

#: g2 = 2 * eris.oOVv - eris.oovv
#: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
    t1new += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov)
    t1new += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)

#: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
#: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
#: woVoV -= eris.oovv
#: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
#: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov)
#: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv)
    woVoV -= eris_oovv
    woVoV = woVoV.transpose(1,3,0,2).copy()
    eris_oVOv = _cp(eris_ovov.transpose(0,3,2,1))
    eris_oOvV = _cp(eris_ovov.transpose(0,2,1,3))

    tau = make_tau(t2, t1, t1)
    #: woooo += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
    lib.dot(eris_oOvV.reshape(-1,nvir**2), tau.reshape(-1,nvir**2).T,
            1, woooo.reshape(nocc**2,-1), 1)
    #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo, tau)
    lib.dot(woooo.reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir),
            .5, t2new.reshape(nocc*nocc,-1), 1)
    for i in range(nocc):
        tau[i] -= t2[i] * .5
    #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
    tau = _cp(tau.transpose(0,3,1,2))
    lib.dot(tau.reshape(-1,nov), eris_oVOv.reshape(-1,nov).T,
            1, woVoV.reshape(nov,-1), 1)
    eris_oovv = eris_ovov = eris_oOvV = taubuf = None

    tmp, tau = tau, None
    t2ibja, eris_oVOv = eris_oVOv, None
    for i in range(nocc):
        t2ibja[i] = t2[i].transpose(2,0,1)
    #: t2new += numpy.einsum('ibkc,kcja->ijab', woVoV, t2ibja)
    lib.dot(woVoV.reshape(nov,-1), t2ibja.reshape(-1,nov), 1, tmp.reshape(nov,-1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1,2,0)

    #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb)
    t2iajb = t2ibja
    for i in range(nocc):
        t2iajb[i] = t2[i].transpose(1,0,2)
    lib.dot(woVoV.reshape(nov,-1), t2iajb.reshape(-1,nov), 1, tmp.reshape(nov,-1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1,0,2)
    t2ibja = t2iajb = woVoV = tmp = None
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1)

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new_tril[ij]  = t2new[i,j]
            t2new_tril[ij] += t2new[j,i].T
            ij += 1
    t2new = None
    time1 = log.timer_debug1('t2 tril', *time1)
    cc.add_wvvVV_(t1, t2, eris, t2new_tril)
    time1 = log.timer_debug1('vvvv', *time1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    p0 = 0
    for i in range(nocc):
        dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1))
        t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2)
        p0 += i+1
    time1 = log.timer_debug1('g2/dijab', *time1)

    t2new = numpy.empty((nocc,nocc,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i):
            t2new[i,j] = t2new_tril[ij]
            t2new[j,i] = t2new_tril[ij].T
            ij += 1
        t2new[i,i] = t2new_tril[ij]
        ij += 1
    t2new_tril = None

#** update_amp_t1
    t1new += fock[:nocc,nocc:] \
           + numpy.einsum('ib,ab->ia', t1, fvv) \
           - numpy.einsum('ja,ji->ia', t1, foo)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new /= eia
#** end update_amp_t1
    time0 = log.timer_debug1('update t1 t2', *time0)

    return t1new, t2new
Esempio n. 38
0
def kernel_ms0(fci,
               h1e,
               eri,
               norb,
               nelec,
               ci0=None,
               link_index=None,
               tol=None,
               lindep=None,
               max_cycle=None,
               max_space=None,
               nroots=None,
               davidson_only=None,
               pspace_size=None,
               max_memory=None,
               verbose=None,
               ecore=0,
               **kwargs):
    if nroots is None: nroots = fci.nroots
    if davidson_only is None: davidson_only = fci.davidson_only
    if pspace_size is None: pspace_size = fci.pspace_size
    if max_memory is None:
        max_memory = fci.max_memory - lib.current_memory()[0]
    log = logger.new_logger(fci, verbose)

    assert (fci.spin is None or fci.spin == 0)
    assert (0 <= numpy.sum(nelec) <= norb * 2)

    link_index = _unpack(norb, nelec, link_index)
    h1e = numpy.ascontiguousarray(h1e)
    eri = numpy.ascontiguousarray(eri)
    na = link_index.shape[0]

    if max_memory < na**2 * 6 * 8e-6:
        log.warn(
            'Not enough memory for FCI solver. '
            'The minimal requirement is %.0f MB', na**2 * 60e-6)

    hdiag = fci.make_hdiag(h1e, eri, norb, nelec)
    nroots = min(hdiag.size, nroots)

    try:
        addr, h0 = fci.pspace(h1e, eri, norb, nelec, hdiag,
                              max(pspace_size, nroots))
        if pspace_size > 0:
            pw, pv = fci.eig(h0)
        else:
            pw = pv = None

        if pspace_size >= na * na and ci0 is None and not davidson_only:
            # The degenerated wfn can break symmetry.  The davidson iteration with proper
            # initial guess doesn't have this issue
            if na * na == 1:
                return pw[0] + ecore, pv[:, 0].reshape(1, 1)
            elif nroots > 1:
                civec = numpy.empty((nroots, na * na))
                civec[:, addr] = pv[:, :nroots].T
                civec = civec.reshape(nroots, na, na)
                try:
                    return pw[:nroots] + ecore, [_check_(ci) for ci in civec]
                except ValueError:
                    pass
            elif abs(pw[0] - pw[1]) > 1e-12:
                civec = numpy.empty((na * na))
                civec[addr] = pv[:, 0]
                civec = civec.reshape(na, na)
                civec = lib.transpose_sum(civec) * .5
                # direct diagonalization may lead to triplet ground state
                ##TODO: optimize initial guess.  Using pspace vector as initial guess may have
                ## spin problems.  The 'ground state' of psapce vector may have different spin
                ## state to the true ground state.
                try:
                    return pw[0] + ecore, _check_(civec.reshape(na, na))
                except ValueError:
                    pass
    except NotImplementedError:
        addr = [0]
        pw = pv = None

    precond = fci.make_precond(hdiag, pw, pv, addr)

    h2e = fci.absorb_h1e(h1e, eri, norb, nelec, .5)

    def hop(c):
        hc = fci.contract_2e(h2e, c.reshape(na, na), norb, nelec, link_index)
        return hc.ravel()


#TODO: check spin of initial guess

    if ci0 is None:
        if callable(getattr(fci, 'get_init_guess', None)):
            ci0 = lambda: fci.get_init_guess(norb, nelec, nroots, hdiag)
        else:

            def ci0():
                x0 = []
                for i in range(nroots):
                    x = numpy.zeros((na, na))
                    addra = addr[i] // na
                    addrb = addr[i] % na
                    if addra == addrb:
                        x[addra, addrb] = 1
                    else:
                        x[addra, addrb] = x[addrb, addra] = numpy.sqrt(.5)
                    x0.append(x.ravel())
                return x0
    elif not callable(ci0):
        if isinstance(ci0, numpy.ndarray) and ci0.size == na * na:
            ci0 = [ci0.ravel()]
        else:
            ci0 = [x.ravel() for x in ci0]

    if tol is None: tol = fci.conv_tol
    if lindep is None: lindep = fci.lindep
    if max_cycle is None: max_cycle = fci.max_cycle
    if max_space is None: max_space = fci.max_space
    tol_residual = getattr(fci, 'conv_tol_residual', None)

    with lib.with_omp_threads(fci.threads):
        #e, c = lib.davidson(hop, ci0, precond, tol=fci.conv_tol, lindep=fci.lindep)
        e, c = fci.eig(hop,
                       ci0,
                       precond,
                       tol=tol,
                       lindep=lindep,
                       max_cycle=max_cycle,
                       max_space=max_space,
                       nroots=nroots,
                       max_memory=max_memory,
                       verbose=log,
                       follow_state=True,
                       tol_residual=tol_residual,
                       **kwargs)
    if nroots > 1:
        return e + ecore, [_check_(ci.reshape(na, na)) for ci in c]
    else:
        return e + ecore, _check_(c.reshape(na, na))
Esempio n. 39
0
File: rks.py Progetto: eronca/pyscf
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:,mo_occ>0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    ni = copy.copy(mf._numint)
    if USE_XCFUN:
        try:
            ni.libxc = dft.xcfun
            xctype = ni._xc_type(mf.xc)
        except (ImportError, KeyError, NotImplementedError):
            ni.libxc = dft.libxc
            xctype = ni._xc_type(mf.xc)
    else:
        xctype = ni._xc_type(mf.xc)
    grids = mf.grids
    hyb = ni.libxc.hybrid_coeff(mf.xc)
    max_memory = 4000

    h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) +
           mol.intor('cint1e_ipnuc_sph', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3)
        h1ao[:,p0:p1] += h1a[:,p0:p1]
        h1ao = h1ao + h1ao.transpose(0,2,1)

        shls_slice = (shl0, shl1) + (0, mol.nbas)*3
        if abs(hyb) > 1e-10:
            vj1, vj2, vk1, vk2 = \
                    _vhf.direct_bindm('cint2e_ip1_sph', 's2kl',
                                      ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                      (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1 - hyb*.5*vk1
            veff[:,p0:p1] += vj2 - hyb*.5*vk2
        else:
            vj1, vj2 = \
                    _vhf.direct_bindm('cint2e_ip1_sph', 's2kl',
                                      ('ji->s2kl', 'lk->s1ij'),
                                      (-dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1
            veff[:,p0:p1] += vj2

        if xctype == 'LDA':
            ao_deriv = 1
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab):
                rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho = vxc[0]
                frr = fxc[0]
                half = lib.dot(ao[0], dm0[:,p0:p1].copy())
                rho1 = numpy.einsum('xpi,pi->xp', ao[1:,:,p0:p1], half)
                aow = numpy.einsum('pi,xp->xpi', ao[0], weight*frr*rho1)
                aow1 = numpy.einsum('xpi,p->xpi', ao[1:,:,p0:p1], weight*vrho)
                aow[:,:,p0:p1] += aow1
                veff[0] += lib.dot(-aow[0].T, ao[0])
                veff[1] += lib.dot(-aow[1].T, ao[0])
                veff[2] += lib.dot(-aow[2].T, ao[0])
                half = aow = aow1 = None

        elif xctype == 'GGA':
            def get_wv(rho, rho1, weight, vxc, fxc):
                vgamma = vxc[1]
                frr, frg, fgg = fxc[:3]
                ngrid = weight.size
                sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:])
                wv = numpy.empty((4,ngrid))
                wv[0]  = frr * rho1[0]
                wv[0] += frg * sigma1 * 2
                wv[1:]  = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:]
                wv[1:] += vgamma * rho1[1:] * 2
                wv *= weight
                return wv
            ao_deriv = 2
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab):
                rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho, vgamma = vxc[:2]
                # (d_X \nabla_x mu) nu DM_{mu,nu}
                half = lib.dot(ao[0], dm0[:,p0:p1].copy())
                rho1X = numpy.einsum('xpi,pi->xp', ao[[1,XX,XY,XZ],:,p0:p1], half)
                rho1Y = numpy.einsum('xpi,pi->xp', ao[[2,YX,YY,YZ],:,p0:p1], half)
                rho1Z = numpy.einsum('xpi,pi->xp', ao[[3,ZX,ZY,ZZ],:,p0:p1], half)
                # (d_X mu) (\nabla_x nu) DM_{mu,nu}
                half = lib.dot(ao[1], dm0[:,p0:p1].copy())
                rho1X[1] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[1] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[1] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)
                half = lib.dot(ao[2], dm0[:,p0:p1].copy())
                rho1X[2] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[2] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[2] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)
                half = lib.dot(ao[3], dm0[:,p0:p1].copy())
                rho1X[3] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[3] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[3] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)

                wv = get_wv(rho, rho1X, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Y, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Z, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))

                wv = numpy.empty_like(rho)
                wv[0]  = weight * vrho
                wv[1:] = rho[1:] * (weight * vgamma * 2)
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0,p0:p1] -= lib.dot(ao[1,:,p0:p1].T.copy(), aow)
                veff[1,p0:p1] -= lib.dot(ao[2,:,p0:p1].T.copy(), aow)
                veff[2,p0:p1] -= lib.dot(ao[3,:,p0:p1].T.copy(), aow)

                aow = numpy.einsum('npi,np->pi', ao[[XX,XY,XZ],:,p0:p1], wv[1:4])
                veff[0,p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[YX,YY,YZ],:,p0:p1], wv[1:4])
                veff[1,p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[ZX,ZY,ZZ],:,p0:p1], wv[1:4])
                veff[2,p0:p1] -= lib.dot(aow.T, ao[0])
        else:
            raise NotImplementedError('meta-GGA')

        veff = veff + veff.transpose(0,2,1)

        if chkfile is None:
            h1aos.append(h1ao+veff)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao+veff)
    if chkfile is None:
        return h1aos
    else:
        return chkfile
Esempio n. 40
0
def update_amps(cc, t1, t2, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(cc.stdout, cc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    fock = eris.fock

    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)
    t2new_tril = numpy.zeros((nocc * (nocc + 1) // 2, nvir, nvir))
    cc.add_wvvVV_(t1, t2, eris, t2new_tril)
    time1 = log.timer_debug1('vvvv', *time0)
    ij = 0
    for i in range(nocc):
        for j in range(i + 1):
            t2new[i, j] = t2new_tril[ij]
            ij += 1
        t2new[i, i] *= .5
    t2new_tril = None

    #** make_inter_F
    fov = fock[:nocc, nocc:].copy()
    t1new += fov

    foo = fock[:nocc, :nocc].copy()
    foo[range(nocc), range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc, nocc:], t1)

    fvv = fock[nocc:, nocc:].copy()
    fvv[range(nvir), range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc, nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2 * t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov)
    woooo = lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape((nocc, ) * 4)
    woooo = lib.transpose_sum(woooo.reshape(nocc * nocc, -1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2, -1)
    woooo = _cp(woooo.reshape(nocc, nocc, nocc, nocc).transpose(0, 2, 1, 3))
    time1 = log.timer_debug1('woooo', *time0)

    eris_ovvv = _cp(eris.ovvv)
    eris_ovvv = lib.unpack_tril(eris_ovvv.reshape(nov, -1))
    eris_ovvv = eris_ovvv.reshape(nocc, nvir, nvir, nvir)

    fvv += numpy.einsum('kc,kcba->ab', 2 * t1, eris_ovvv)
    fvv += numpy.einsum('kc,kbca->ab', -t1, eris_ovvv)

    #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov)
    #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv)
    woVoV = lib.dot(_cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)), t1)
    woVoV = woVoV.reshape(nocc, nocc, nvir, nvir)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: tmp = numpy.einsum('ijcd,kcdb->kijb', tau, eris.ovvv)
    #: t2new += numpy.einsum('ka,kijb->jiba', -t1, tmp)
    tau = make_tau(t2, t1, t1)
    tmp = numpy.empty((nocc, nocc, nocc, nvir))
    for k in range(nocc):
        tmp[k] = lib.dot(tau.reshape(-1, nvir**2), eris_ovvv[k].reshape(
            -1, nvir)).reshape(nocc, nocc, nvir).transpose(1, 0, 2)
        lib.dot(t1, eris_ovvv[k].reshape(nvir, -1), -1,
                woVoV[k].reshape(nocc, -1), 1)
    lib.dot(tmp.reshape(nocc, -1).T, t1, -1, t2new.reshape(-1, nvir), 1)
    tmp = None

    #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
    #: t2new += woVoV.transpose()
    #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo, t1)
    wOVov, tau = tau, None
    lib.dot(_cp(_cp(eris.ooov).transpose(0, 2, 3, 1).reshape(-1, nocc)), t1,
            -1, wOVov.reshape(-1, nvir))
    #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1)
    lib.dot(t1, eris_ovvv.reshape(-1, nvir).T, 1, wOVov.reshape(nocc, -1), 1)
    for i in range(nocc):
        t2new[i] += wOVov[i].transpose(0, 2, 1)


#: theta = t2.transpose(0,1,3,2) * 2 - t2
#: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
    theta = numpy.empty((nocc, nocc, nvir, nvir))
    for i in range(nocc):
        theta[i] = t2[i].transpose(0, 2, 1) * 2
        theta[i] -= t2[i]
        lib.dot(_cp(theta[i].transpose(0, 2, 1).reshape(nocc, -1)),
                eris_ovvv[i].reshape(-1, nvir), 1, t1new, 1)
    eris_ovvv = None

    eris_ovov = _cp(eris.ovov)

    for i in range(nocc):
        t2new[i] += eris_ovov[i].transpose(1, 0, 2) * .5

    fov += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
    fov -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: t1new += numpy.einsum('jb,ijab->ia', fov, theta)
    #: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta)
    t1new += numpy.einsum('jb,jiab->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov, theta)
    lib.dot(_cp(eris_ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)),
            theta.reshape(-1, nvir), -1, t1new, 1)
    eris_ooov = None

    #: wOVov += eris.ovov.transpose(0,1,3,2)
    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
    wOVov = _cp(wOVov.transpose(0, 3, 1, 2))
    eris_OVov = lib.transpose(eris_ovov.reshape(-1, nov)).reshape(
        nocc, nvir, -1, nvir)
    eris_OvoV = _cp(eris_OVov.transpose(0, 3, 2, 1))
    wOVov += eris_OVov
    t2iajb = t2.transpose(0, 2, 1, 3).copy()
    #: wOVov[j0:j1] -= .5 * numpy.einsum('iakc,jkbc->jbai', eris_ovov, t2)
    lib.dot(t2iajb.reshape(-1, nov), eris_OvoV.reshape(nov, -1), -.5,
            wOVov.reshape(nov, -1), 1)
    tau, t2iajb = t2iajb, None
    for i in range(nocc):
        tau[i] = tau[i] * 2 - t2[i].transpose(2, 0, 1)
        tau[i] -= numpy.einsum('a,jb->bja', t1[i] * 2, t1)
    #: wOVov += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
    lib.dot(tau.reshape(-1, nov), eris_OVov.reshape(nov, -1), .5,
            wOVov.reshape(nov, -1), 1)

    #theta = t2 * 2 - t2.transpose(0,1,3,2)
    #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
    tmp, tau = tau, None
    theta = _cp(theta.transpose(0, 3, 1, 2).reshape(nov, -1))
    lib.dot(wOVov.reshape(nov, -1), theta.T, 1, tmp.reshape(nov, -1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1, 0, 2)
    tmp = wOVov = eris_OvoV = eris_OVov = None

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
    for i in range(nocc):
        tau = numpy.einsum('a,jb->jab', t1[i] * .5, t1) + t2[i]
        theta = tau.transpose(0, 2, 1) * 2 - tau
        lib.dot(
            _cp(eris_ovov[i].transpose(1, 2, 0)).reshape(nocc, -1),
            theta.reshape(nocc, -1).T, 1, foo, 1)
        lib.dot(
            theta.reshape(-1, nvir).T, eris_ovov[i].reshape(nvir, -1).T, -1,
            fvv, 1)
    tau = theta = None

    eris_oovv = _cp(eris.oovv)
    #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv)
    #:t2new += numpy.einsum('ka,jibk->jiab', -t1, tmp)
    #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
    #:t2new += numpy.einsum('ka,jibk->jiba', -t1, tmp)
    for j in range(nocc):
        tmp = lib.dot(t1, eris_oovv[j].reshape(-1, nvir).T)
        tmp = _cp(tmp.reshape(nocc, nocc, nvir).transpose(0, 2, 1))
        t2new[j] += lib.dot(tmp.reshape(-1, nocc), t1,
                            -1).reshape(nocc, nvir, nvir).transpose(0, 2, 1)
        lib.dot(t1, eris_ovov[j].reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1))
        lib.dot(tmp.reshape(-1, nocc), t1, -1, t2new[j].reshape(-1, nvir), 1)
    tmp = None

    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
    t1new += numpy.einsum('jb,iajb->ia', 2 * t1, eris_ovov)
    t1new += numpy.einsum('jb,ijba->ia', -t1, eris_oovv)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: woVoV -= eris.oovv
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov)
    #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv)
    woVoV -= eris_oovv
    woVoV = woVoV.transpose(1, 3, 0, 2).copy()
    eris_oVOv = _cp(eris_ovov.transpose(0, 3, 2, 1))
    eris_oOvV = _cp(eris_ovov.transpose(0, 2, 1, 3))

    tau = make_tau(t2, t1, t1)
    #: woooo += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
    lib.dot(eris_oOvV.reshape(-1, nvir**2),
            tau.reshape(-1, nvir**2).T, 1, woooo.reshape(nocc**2, -1), 1)
    #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo, tau)
    lib.dot(
        woooo.reshape(-1, nocc * nocc).T, tau.reshape(-1, nvir * nvir), .5,
        t2new.reshape(nocc * nocc, -1), 1)
    for i in range(nocc):
        tau[i] -= t2[i] * .5
    #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
    tau = _cp(tau.transpose(0, 3, 1, 2))
    lib.dot(tau.reshape(-1, nov),
            eris_oVOv.reshape(-1, nov).T, 1, woVoV.reshape(nov, -1), 1)
    eris_oovv = eris_ovov = eris_oOvV = taubuf = None

    tmp, tau = tau, None
    t2ibja, eris_oVOv = eris_oVOv, None
    for i in range(nocc):
        t2ibja[i] = t2[i].transpose(2, 0, 1)
    #: t2new += numpy.einsum('ibkc,kcja->ijab', woVoV, t2ibja)
    lib.dot(woVoV.reshape(nov, -1), t2ibja.reshape(-1, nov), 1,
            tmp.reshape(nov, -1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1, 2, 0)

    #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb)
    t2iajb = t2ibja
    for i in range(nocc):
        t2iajb[i] = t2[i].transpose(1, 0, 2)
    lib.dot(woVoV.reshape(nov, -1), t2iajb.reshape(-1, nov), 1,
            tmp.reshape(nov, -1))
    for i in range(nocc):
        t2new[i] += tmp[i].transpose(1, 0, 2)
    t2ibja = t2iajb = woVoV = tmp = None
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5 * t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5 * t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.dot(t2.reshape(-1, nvir), ft_ab.T, 1, t2new.reshape(-1, nvir), 1)
    lib.dot(ft_ij.T, t2.reshape(nocc, -1), -1, t2new.reshape(nocc, -1), 1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc, None] - mo_e[None, nocc:]
    t1new += numpy.einsum('ib,ab->ia', t1, fvv)
    t1new -= numpy.einsum('ja,ji->ia', t1, foo)
    t1new /= eia

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    for i in range(nocc):
        if i > 0:
            t2new[i, :i] += t2new[:i, i].transpose(0, 2, 1)
            t2new[i, :i] /= lib.direct_sum('a,jb->jab', eia[i], eia[:i])
            t2new[:i, i] = t2new[i, :i].transpose(0, 2, 1)
        t2new[i, i] = t2new[i, i] + t2new[i, i].T
        t2new[i, i] /= lib.direct_sum('a,b->ab', eia[i], eia[i])

    time0 = log.timer_debug1('update t1 t2', *time0)
    #if hasattr(pyscf, 'MKL_NUM_THREADS'):
    #    pyscf._libmkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(1)))
    return t1new, t2new
Esempio n. 41
0
def update_amps(cc, t1, t2, eris, blksize=1):
    time1 = time0 = time.clock(), time.time()
    log = logger.Logger(cc.stdout, cc.verbose)
    nocc = cc.nocc
    nmo = cc.nmo
    nvir = nmo - nocc
    nov = nocc*nvir
    fock = eris.fock
    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = numpy.asarray(eris.ooov)
    woooo = lib.dot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True)
    woooo = woooo.reshape(nocc,nocc,nocc,nocc) + numpy.asarray(eris.oooo)
    woooo = numpy.asarray(woooo.transpose(0,2,1,3), order='C')
    time1 = log.timer_debug1('woooo', *time0)

    for p0, p1 in prange(0, nocc, blksize):
# ==== read eris.ovvv ====
        eris_ovvv = numpy.asarray(eris.ovvv[p0:p1])
        eris_ovvv = unpack_tril(eris_ovvv.reshape((p1-p0)*nvir,-1))
        eris_ovvv = eris_ovvv.reshape(p1-p0,nvir,nvir,nvir)
        eris_ooov = numpy.asarray(eris.ooov[p0:p1])

        fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1], eris_ovvv)
        fvv += numpy.einsum('kc,kbca->ab',  -t1[p0:p1], eris_ovvv)

        foo[:,p0:p1] += numpy.einsum('kc,jikc->ij', 2*t1, eris.ooov[p0:p1])
        foo[:,p0:p1] += numpy.einsum('kc,jkic->ij',  -t1, eris.ooov[p0:p1])

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv)
    #: t2new += numpy.einsum('ka,ijbk->jiba', -t1, tmp)
        #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy()
        eris_vvov = eris_ovvv.transpose(1,2,0,3).reshape(nvir*nvir,-1)
        tmp = numpy.empty((nocc,nocc,p1-p0,nvir))
        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1)
            #: tmp[j0:j1] += numpy.einsum('ijcd,cdkb->ijkb', tau, eris_vvov)
            lib.dot(tau.reshape(-1,nvir*nvir), eris_vvov, 1,
                    tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0)
        #: t2new += numpy.einsum('ka,ijkb->jiba', -t1[p0:p1], tmp)
        tmp = numpy.asarray(tmp.transpose(1,0,3,2).reshape(-1,p1-p0), order='C')
        lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
        tau = tmp = eris_vvov = None
        #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2)

    #: wovvo += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wovvo -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
    #: t2new += woVoV.transpose()
        #: wovvo = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1)
        tmp = numpy.asarray(eris.ovoo[p0:p1].transpose(2,0,1,3), order='C')
        wovvo = lib.dot(tmp.reshape(-1,nocc), t1, -1)
        wovvo = wovvo.reshape(nocc,p1-p0,nvir,nvir)
        #: wovvo += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1)
        lib.dot(t1, eris_ovvv.reshape(-1,nvir).T, 1, wovvo.reshape(nocc,-1), 1)
        t2new[p0:p1] += wovvo.transpose(1,0,2,3)

        #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv)
        woVoV = lib.dot(numpy.asarray(eris_ooov.transpose(0,1,3,2),
                                      order='C').reshape(-1,nocc), t1)
        woVoV = woVoV.reshape(p1-p0,nocc,nvir,nvir)
        for i in range(eris_ovvv.shape[0]):
            lib.dot(t1, eris_ovvv[i].reshape(nvir,-1), -1,
                    woVoV[i].reshape(nocc,-1), 1)

    #: theta = t2.transpose(0,1,3,2) * 2 - t2
    #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
        theta = make_theta(t2[p0:p1])
        #: t1new += numpy.einsum('jibc,jcba->ia', theta, eris_ovvv)
        lib.dot(theta.transpose(1,0,3,2).reshape(nocc,-1),
                eris_ovvv.reshape(-1,nvir), 1, t1new, 1)
        eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1)
        #==== mem usage blksize*(nvir**3+nocc*nvir**2*4)

# ==== read eris.oOVv ====
        eris_oOVv = numpy.asarray(eris.ovov[p0:p1].transpose(0,2,3,1), order='C')
        #==== mem usage blksize*(nocc*nvir**2*4)

        for i in range(p1-p0):
            t2new[p0+i] += eris_oOVv[i].transpose(0,2,1) * .5

        fov[p0:p1] += numpy.einsum('kc,ikca->ia', t1, eris_oOVv) * 2
        fov[p0:p1] -= numpy.einsum('kc,ikac->ia', t1, eris_oOVv)

    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: t1new += numpy.einsum('jb,ijab->ia', fov, theta)
    #: t1new -= numpy.einsum('ikjb,kjab->ia', eris.ooov, theta)
        t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta)
        #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta)
        lib.dot(eris_ooov.transpose(1,0,2,3).reshape(nocc,-1),
                theta.reshape(-1,nvir), -1, t1new, 1)
        eris_ooov = None

    #: wovvo += eris.ovov.transpose(0,1,3,2)
    #: theta = t2.transpose(1,0,2,3) * 2 - t2
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wovvo += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wovvo -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wovvo)
        theta = numpy.asarray(theta.transpose(1,2,3,0).reshape(nov,-1), order='C')
        wovvo = wovvo.transpose(0,3,2,1) + eris_oOVv.transpose(1,2,3,0)
        wovvo = numpy.asarray(wovvo, order='C')
        eris_OVvo = eris_oOVv.transpose(1,2,3,0).reshape(nov,-1)
        eris_OvVo = eris_oOVv.transpose(1,3,2,0).reshape(nov,-1)
        for j0, j1 in prange(0, nocc, blksize):
            t2iajb = numpy.asarray(t2[j0:j1].transpose(0,2,1,3), order='C')
            #: wovvo[j0:j1] -= .5 * numpy.einsum('icka,jkbc->jbai', eris_oOVv, t2)
            lib.dot(t2iajb.reshape(-1,nov), eris_OvVo,
                    -.5, wovvo[j0:j1].reshape((j1-j0)*nvir,-1), 1)
            tau = t2iajb
            for i in range(j1-j0):
                tau[i] *= 2
                tau[i] -= t2[j0+i].transpose(2,0,1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1)
            #: wovvo[j0:j1] += .5 * numpy.einsum('ikca,jbkc->jbai', eris_oOVv, tau)
            lib.dot(tau.reshape(-1,nov), eris_OVvo,
                    .5, wovvo[j0:j1].reshape((j1-j0)*nvir,-1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wovvo[j0:j1])
            tmp = lib.dot(wovvo[j0:j1].reshape((j1-j0)*nvir,-1), theta.T)
            t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,1,3)
            tau = tmp = None
            #==== mem usage blksize*(nocc*nvir**2*8)
        theta = wovvo = eris_OvVo = eris_OVvo = None
        time2 = log.timer_debug1('wovvo [%d:%d]'%(p0, p1), *time2)
        #==== mem usage blksize*(nocc*nvir**2*2)

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, .5)
        theta = make_theta(tau)
        #: foo += numpy.einsum('kiab,kjab->ij', eris_oOVv, theta)
        #: fvv -= numpy.einsum('ijca,ijcb->ab', theta, eris_oOVv)
        for i in range(eris_oOVv.shape[0]):
            lib.dot(eris_oOVv[i].reshape(nocc,-1),
                    theta[i].reshape(nocc,-1).T, 1, foo, 1)
        lib.dot(theta.reshape(-1,nvir).T, eris_oOVv.reshape(-1,nvir),
                -1, fvv, 1)
        tau = theta = None

# ==== read eris.oovv ====
        eris_oovv = numpy.asarray(eris.oovv[p0:p1])
        #==== mem usage blksize*(nocc*nvir**2*3)

        #: tmp  = numpy.einsum('ic,kjbc->kjib', t1, eris_oovv)
        #: tmp += numpy.einsum('ic,kjbc->kijb', t1, eris_oOVv)
        tmp = lib.dot(eris_oovv.reshape(-1,nvir), t1.T).reshape(-1,nocc,nvir,nocc)
        tmp = numpy.asarray(tmp.transpose(0,3,2,1), order='C')
        lib.dot(eris_oOVv.reshape(-1,nvir), t1.T, 1, tmp.reshape(-1,nocc), 1)
        tmp = numpy.asarray(tmp.transpose(1,3,2,0), order='C')
        #: t2new += numpy.einsum('ka,jibk->ijba', -t1[p0:p1], tmp)
        lib.dot(tmp.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
        tmp = None

    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia', 2*t1, eris_oOVv)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: woVoV -= eris.oovv
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov)
    #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv)
        woVoV -= eris_oovv
        woVoV = woVoV.transpose(1,3,0,2).copy()
        eris_oVOv = eris_oOVv.transpose(0,2,1,3).reshape(-1,nov)
        eris_oOvV = eris_oOVv.transpose(0,1,3,2).reshape(-1,nvir**2)
        #==== mem usage blksize*(nocc*nvir**2*4)

        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1)
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            lib.numpy_helper._dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir,
                                    eris_oOvV.reshape(-1,nvir*nvir),
                                    tau.reshape(-1,nvir*nvir),
                                    woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1,
                                    0, 0, j0*nocc)
            for i in range(j1-j0):
                tau[i] -= t2[j0+i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ikbc->jiab', tau, eris_oOVv)
            tau = tau.transpose(0,3,1,2).reshape(-1,nov)
            lib.dot(tau, eris_oVOv.T,
                    1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1)
            #==== mem usage blksize*(nocc*nvir**2*6)
        time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1)
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.dot(woooo[p0:p1].reshape(-1,nocc*nocc).T,
                tau.reshape(-1,nvir*nvir), .5,
                t2new.reshape(nocc*nocc,-1), 1)
        eris_oovv = eris_oOVv = eris_oVOv = eris_oOvV = tau = None
        #==== mem usage blksize*(nocc*nvir**2*1)

        t2iajb = numpy.asarray(t2[p0:p1].transpose(0,2,1,3), order='C')
        t2ibja = numpy.asarray(t2[p0:p1].transpose(0,3,1,2), order='C')
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            tmp = lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                          t2ibja.reshape(-1,nov))
            t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,3,1)
            tmp = None

            #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb)
            tmp = lib.dot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                          t2iajb.reshape(-1,nov))
            t2new[j0:j1] += tmp.reshape(j1-j0,nvir,nocc,nvir).transpose(0,2,1,3)
            tmp = None
        t2ibja = t2iajb = woVoV = None
        #==== mem usage blksize*(nocc*nvir**2*3)
        time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1)
# ==================
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.dot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.dot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1)

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    t2new_tril = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new_tril[ij]  = t2new[i,j]
            t2new_tril[ij] += t2new[j,i].T
            ij += 1
    t2new = None
    time1 = log.timer_debug1('t2 tril', *time1)
    cc.add_wvvVV_(t1, t2, eris, t2new_tril, blksize)
    time1 = log.timer_debug1('vvvv', *time1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    p0 = 0
    for i in range(nocc):
        dajb = (eia[i].reshape(-1,1) + eia[:i+1].reshape(1,-1))
        t2new_tril[p0:p0+i+1] /= dajb.reshape(nvir,i+1,nvir).transpose(1,0,2)
        p0 += i+1
    time1 = log.timer_debug1('g2/dijab', *time1)

    t2new = numpy.empty((nocc,nocc,nvir,nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i):
            t2new[i,j] = t2new_tril[ij]
            t2new[j,i] = t2new_tril[ij].T
            ij += 1
        t2new[i,i] = t2new_tril[ij]
        ij += 1
    t2new_tril = None

#** update_amp_t1
    t1new += fock[:nocc,nocc:] \
           + numpy.einsum('ib,ab->ia', t1, fvv) \
           - numpy.einsum('ja,ji->ia', t1, foo)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new /= eia
#** end update_amp_t1
    time0 = log.timer_debug1('update t1 t2', *time0)

    return t1new, t2new
Esempio n. 42
0
def update_amps(cc, t1, t2, eris, max_memory=2000):
    time0 = time.clock(), time.time()
    log = logger.Logger(cc.stdout, cc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    fock = eris.fock
    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)

    #** make_inter_F
    fov = fock[:nocc, nocc:].copy()

    foo = fock[:nocc, :nocc].copy()
    foo[range(nocc), range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc, nocc:], t1)

    fvv = fock[nocc:, nocc:].copy()
    fvv[range(nvir), range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc, nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2 * t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov)
    woooo = lib.dot(eris_ooov.reshape(-1, nvir), t1.T).reshape((nocc, ) * 4)
    woooo = lib.transpose_sum(woooo.reshape(nocc * nocc, -1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2, -1)
    woooo = _cp(woooo.reshape(nocc, nocc, nocc, nocc).transpose(0, 2, 1, 3))
    eris_ooov = None
    time1 = log.timer_debug1('woooo', *time0)

    unit = _memory_usage_inloop(nocc, nvir) * 1e6 / 8
    max_memory = max_memory - lib.current_memory()[0]
    blksize = max(BLKMIN, int(max_memory * .95e6 / 8 / unit))
    log.debug1('block size = %d, nocc = %d is divided into %d blocks', blksize,
               nocc, int((nocc + blksize - 1) // blksize))

    for p0, p1 in prange(0, nocc, blksize):
        # ==== read eris.ovvv ====
        eris_ovvv = _cp(eris.ovvv[p0:p1])
        eris_ovvv = _ccsd.unpack_tril(eris_ovvv.reshape((p1 - p0) * nvir, -1))
        eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir)

        fvv += numpy.einsum('kc,kcba->ab', 2 * t1[p0:p1], eris_ovvv)
        fvv += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv)

        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: tmp = numpy.einsum('ijcd,kdcb->ijbk', tau, eris.ovvv)
        #: t2new += numpy.einsum('ka,ijbk->ijba', -t1, tmp)
        #: eris_vvov = eris_ovvv.transpose(1,2,0,3).copy()
        eris_vvov = _cp(
            eris_ovvv.transpose(2, 1, 0, 3).reshape(nvir * nvir, -1))
        tmp = numpy.empty((nocc, nocc, p1 - p0, nvir))
        taubuf = numpy.empty((blksize, nocc, nvir, nvir))
        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1 - j0])
            lib.dot(tau.reshape(-1, nvir * nvir), eris_vvov, 1,
                    tmp[j0:j1].reshape((j1 - j0) * nocc, -1), 0)
        tmp = _cp(tmp.transpose(0, 1, 3, 2).reshape(-1, p1 - p0))
        lib.dot(tmp, t1[p0:p1], -1, t2new.reshape(-1, nvir), 1)
        tau = tmp = eris_vvov = None
        #==== mem usage blksize*(nvir**3*2+nvir*nocc**2*2)

        #: wOVov += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
        #: wOVov -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
        #: t2new += woVoV.transpose()
        #: wOVov = -numpy.einsum('jbik,ka->ijba', eris.ovoo[p0:p1], t1)
        tmp = _cp(eris.ovoo[p0:p1].transpose(2, 0, 1, 3))
        wOVov = lib.dot(tmp.reshape(-1, nocc), t1, -1)
        tmp = None
        wOVov = wOVov.reshape(nocc, p1 - p0, nvir, nvir)
        #: wOVov += numpy.einsum('iabc,jc->jiab', eris_ovvv, t1)
        lib.dot(t1,
                eris_ovvv.reshape(-1, nvir).T, 1, wOVov.reshape(nocc, -1), 1)
        t2new[p0:p1] += wOVov.transpose(1, 0, 2, 3)

        eris_ooov = _cp(eris.ooov[p0:p1])
        #: woVoV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        #: woVoV -= numpy.einsum('jc,icab->ijab', t1, eris_ovvv)
        woVoV = lib.dot(_cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)),
                        t1)
        woVoV = woVoV.reshape(p1 - p0, nocc, nvir, nvir)
        for i in range(eris_ovvv.shape[0]):
            lib.dot(t1, eris_ovvv[i].reshape(nvir, -1), -1,
                    woVoV[i].reshape(nocc, -1), 1)

    #: theta = t2.transpose(0,1,3,2) * 2 - t2
    #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
        theta = numpy.empty((p1 - p0, nocc, nvir, nvir))
        for i in range(p1 - p0):
            theta[i] = t2[p0 + i].transpose(0, 2, 1) * 2
            theta[i] -= t2[p0 + i]
            lib.dot(_cp(theta[i].transpose(0, 2, 1).reshape(nocc, -1)),
                    eris_ovvv[i].reshape(-1, nvir), 1, t1new, 1)
        eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]' % (p0, p1), *time1)
        #==== mem usage blksize*(nvir**3+nocc*nvir**2*4)

        # ==== read eris.ovov ====
        eris_ovov = _cp(eris.ovov[p0:p1])
        #==== mem usage blksize*(nocc*nvir**2*4)

        for i in range(p1 - p0):
            t2new[p0 + i] += eris_ovov[i].transpose(1, 0, 2) * .5

        fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
        fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

        #: theta = t2.transpose(1,0,2,3) * 2 - t2
        #: t1new += numpy.einsum('jb,ijba->ia', fov, theta)
        #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta)
        t1new += numpy.einsum('jb,jiab->ia', fov[p0:p1], theta)
        #: t1new -= numpy.einsum('kijb,kjab->ia', eris.ooov[p0:p1], theta)
        lib.dot(_cp(eris_ooov.transpose(1, 0, 2, 3).reshape(nocc, -1)),
                theta.reshape(-1, nvir), -1, t1new, 1)
        eris_ooov = None

        #: wOVov += eris.ovov.transpose(0,1,3,2)
        #: theta = t2.transpose(1,0,2,3) * 2 - t2
        #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
        #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
        #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
        #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
        theta = _cp(theta.transpose(0, 3, 1, 2))
        wOVov = _cp(wOVov.transpose(0, 3, 1, 2))
        eris_OVov = lib.transpose(eris_ovov.reshape(-1, nov)).reshape(
            nocc, nvir, -1, nvir)
        eris_OvoV = _cp(eris_OVov.transpose(0, 3, 2, 1))
        wOVov += eris_OVov
        for j0, j1 in prange(0, nocc, blksize):
            t2iajb = t2[j0:j1].transpose(0, 2, 1, 3).copy()
            #: wOVov[j0:j1] -= .5 * numpy.einsum('iack,jkbc->jbai', eris_ovov, t2)
            lib.dot(t2iajb.reshape(-1, nov), eris_OvoV.reshape(nov, -1), -.5,
                    wOVov[j0:j1].reshape((j1 - j0) * nvir, -1), 1)
            tau, t2iajb = t2iajb, None
            for i in range(j1 - j0):
                tau[i] *= 2
                tau[i] -= t2[j0 + i].transpose(2, 0, 1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0 + i] * 2, t1)
            #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
            lib.dot(tau.reshape(-1, nov), eris_OVov.reshape(nov, -1), .5,
                    wOVov[j0:j1].reshape((j1 - j0) * nvir, -1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
            tmp, tau = tau, None
            lib.dot(wOVov[j0:j1].reshape((j1 - j0) * nvir, -1),
                    theta.reshape(-1, nov), 1, tmp.reshape(-1, nov))
            for i in range(j1 - j0):
                t2new[j0 + i] += tmp[i].transpose(1, 0, 2)
            tmp = None
            #==== mem usage blksize*(nocc*nvir**2*8)
        theta = wOVov = eris_OvoV = eris_OVov = None
        time2 = log.timer_debug1('wOVov [%d:%d]' % (p0, p1), *time2)
        #==== mem usage blksize*(nocc*nvir**2*2)

        #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
        #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        for i in range(p1 - p0):
            tau = numpy.einsum('a,jb->jab', t1[p0 + i] * .5, t1)
            tau += t2[p0 + i]
            theta = tau.transpose(0, 2, 1) * 2
            theta -= tau
            lib.dot(
                _cp(eris_ovov[i].transpose(1, 2, 0)).reshape(nocc, -1),
                theta.reshape(nocc, -1).T, 1, foo, 1)
            lib.dot(
                theta.reshape(-1, nvir).T, eris_ovov[i].reshape(nvir, -1).T,
                -1, fvv, 1)
        tau = theta = None

        # ==== read eris.oovv ====
        eris_oovv = _cp(eris.oovv[p0:p1])
        #==== mem usage blksize*(nocc*nvir**2*3)

        #:tmp = numpy.einsum('ic,jkbc->jibk', t1, eris_oovv)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiab', -t1, tmp)
        #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp)
        for j in range(p1 - p0):
            tmp = lib.dot(t1, eris_oovv[j].reshape(-1, nvir).T)
            tmp = _cp(tmp.reshape(nocc, nocc, nvir).transpose(0, 2, 1))
            t2new[p0 + j] += lib.dot(tmp.reshape(-1, nocc), t1,
                                     -1).reshape(nocc, nvir,
                                                 nvir).transpose(0, 2, 1)
            lib.dot(t1, eris_ovov[j].reshape(-1, nvir).T, 1,
                    tmp.reshape(nocc, -1))
            lib.dot(tmp.reshape(-1, nocc), t1, -1,
                    t2new[p0 + j].reshape(-1, nvir), 1)
        tmp = None

        #: g2 = 2 * eris.oOVv - eris.oovv
        #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2 * t1, eris_ovov)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv)

        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
        #: woVoV -= eris.oovv
        #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: woVoV += numpy.einsum('ka,ijkb->ijab', t1, eris.ooov)
        #: woVoV += numpy.einsum('jkca,ikbc->ijab', tau, eris.oOVv)
        woVoV -= eris_oovv
        woVoV = woVoV.transpose(1, 3, 0, 2).copy()
        eris_oVOv = _cp(eris_ovov.transpose(0, 3, 2, 1))
        eris_oOvV = _cp(eris_ovov.transpose(0, 2, 1, 3))
        #==== mem usage blksize*(nocc*nvir**2*4)

        taubuf = numpy.empty((blksize, nocc, nvir, nvir))
        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=taubuf[:j1 - j0])
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            lib.numpy_helper._dgemm('N', 'T', (p1 - p0) * nocc,
                                    (j1 - j0) * nocc, nvir * nvir,
                                    eris_oOvV.reshape(-1, nvir * nvir),
                                    tau.reshape(-1, nvir * nvir),
                                    woooo[p0:p1].reshape(-1, nocc * nocc), 1,
                                    1, 0, 0, j0 * nocc)
            for i in range(j1 - j0):
                tau[i] -= t2[j0 + i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
            lib.dot(_cp(tau.transpose(0, 3, 1, 2).reshape(-1, nov)),
                    eris_oVOv.reshape(-1, nov).T, 1, woVoV[j0:j1].reshape(
                        (j1 - j0) * nvir, -1), 1)
            #==== mem usage blksize*(nocc*nvir**2*6)
        time2 = log.timer_debug1('woVoV [%d:%d]' % (p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=taubuf[:p1 - p0])
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.dot(woooo[p0:p1].reshape(-1, nocc * nocc).T,
                tau.reshape(-1, nvir * nvir), .5,
                t2new.reshape(nocc * nocc, -1), 1)
        eris_oovv = eris_ovov = eris_oVOv = eris_oOvV = taubuf = tau = None
        #==== mem usage blksize*(nocc*nvir**2*1)

        t2iajb = _cp(t2[p0:p1].transpose(0, 2, 1, 3))
        t2ibja = _cp(t2[p0:p1].transpose(0, 3, 1, 2))
        tmp = numpy.empty((blksize, nvir, nocc, nvir))
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            lib.dot(woVoV[j0:j1].reshape((j1 - j0) * nvir, -1),
                    t2ibja.reshape(-1, nov), 1, tmp[:j1 - j0].reshape(-1, nov))
            for i in range(j1 - j0):
                t2new[j0 + i] += tmp[i].transpose(1, 2, 0)

            #: t2new[j0:j1] += numpy.einsum('iakc,kcjb->ijab', woVoV[j0:j1], t2iajb)
            lib.dot(woVoV[j0:j1].reshape((j1 - j0) * nvir, -1),
                    t2iajb.reshape(-1, nov), 1, tmp[:j1 - j0].reshape(-1, nov))
            for i in range(j1 - j0):
                t2new[j0 + i] += tmp[i].transpose(1, 0, 2)
        t2ibja = t2iajb = woVoV = tmp = None
        #==== mem usage blksize*(nocc*nvir**2*3)
        time1 = log.timer_debug1('contract occ [%d:%d]' % (p0, p1), *time1)


# ==================
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5 * t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5 * t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.dot(t2.reshape(-1, nvir), ft_ab.T, 1, t2new.reshape(-1, nvir), 1)
    lib.dot(ft_ij.T, t2.reshape(nocc, -1), -1, t2new.reshape(nocc, -1), 1)

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    t2new_tril = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i + 1):
            t2new_tril[ij] = t2new[i, j]
            t2new_tril[ij] += t2new[j, i].T
            ij += 1
    t2new = None
    time1 = log.timer_debug1('t2 tril', *time1)
    cc.add_wvvVV_(t1, t2, eris, t2new_tril, max_memory)
    time1 = log.timer_debug1('vvvv', *time1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc, None] - mo_e[None, nocc:]
    p0 = 0
    for i in range(nocc):
        t2new_tril[p0:p0 + i + 1] /= lib.direct_sum('a,jb->jab', eia[i],
                                                    eia[:i + 1])
        p0 += i + 1
    time1 = log.timer_debug1('g2/dijab', *time1)

    t2new = numpy.empty((nocc, nocc, nvir, nvir))
    ij = 0
    for i in range(nocc):
        for j in range(i):
            t2new[i, j] = t2new_tril[ij]
            t2new[j, i] = t2new_tril[ij].T
            ij += 1
        t2new[i, i] = t2new_tril[ij]
        ij += 1
    t2new_tril = None

    #** update_amp_t1
    t1new += fock[:nocc,nocc:] \
           + numpy.einsum('ib,ab->ia', t1, fvv) \
           - numpy.einsum('ja,ji->ia', t1, foo)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc, None] - mo_e[None, nocc:]
    t1new /= eia
    #** end update_amp_t1
    time0 = log.timer_debug1('update t1 t2', *time0)

    return t1new, t2new
Esempio n. 43
0
def make_hdiag(h1e, eri, norb, nelec):
    hdiag = direct_spin1.make_hdiag(h1e, eri, norb, nelec)
    na = int(numpy.sqrt(hdiag.size))
    # symmetrize hdiag to reduce numerical error
    hdiag = lib.transpose_sum(hdiag.reshape(na, na), inplace=True) * .5
    return hdiag.ravel()
Esempio n. 44
0
File: uccsd.py Progetto: tmash/pyscf
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None):
    log = logger.Logger(mycc.stdout, mycc.verbose)
    time1 = time.clock(), time.time()
    if fsave is None:
        incore = True
        fsave = lib.H5TmpFile()
    else:
        incore = False
    dovov, dovOV, dOVov, dOVOV = d2[0]
    dvvvv, dvvVV, dVVvv, dVVVV = d2[1]
    doooo, dooOO, dOOoo, dOOOO = d2[2]
    doovv, dooVV, dOOvv, dOOVV = d2[3]
    dovvo, dovVO, dOVvo, dOVVO = d2[4]
    dvvov, dvvOV, dVVov, dVVOV = d2[5]
    dovvv, dovVV, dOVvv, dOVVV = d2[6]
    dooov, dooOV, dOOov, dOOOV = d2[7]
    mo_a = numpy.asarray(mo_coeff[0], order='F')
    mo_b = numpy.asarray(mo_coeff[1], order='F')

    nocca, nvira, noccb, nvirb = dovOV.shape
    nao, nmoa = mo_a.shape
    nmob = mo_b.shape[1]
    nao_pair = nao * (nao+1) // 2
    nvira_pair = nvira * (nvira+1) //2
    nvirb_pair = nvirb * (nvirb+1) //2

    fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
    ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1
    fmm = _ccsd.libcc.CCmmm_transpose_sum
    pao_loc = ctypes.POINTER(ctypes.c_void_p)()
    def _trans(vin, mo_coeff, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow,nao_pair))
        fdrv(ftrans, fmm,
             out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p),
             ctypes.c_int(nrow), ctypes.c_int(nao),
             (ctypes.c_int*4)(*orbs_slice), pao_loc, ctypes.c_int(0))
        return out

    fswap = lib.H5TmpFile()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize_a = int(max_memory*.9e6/8/(nao_pair+nmoa**2))
    blksize_a = min(nvira_pair, max(ccsd.BLKMIN, blksize_a))
    chunks_a = (int(min(nao_pair, 4e8/blksize_a)), blksize_a)
    v_aa = fswap.create_dataset('v_aa', (nao_pair,nvira_pair), 'f8',
                                chunks=chunks_a)
    for p0, p1 in lib.prange(0, nvira_pair, blksize_a):
        v_aa[:,p0:p1] = _trans(lib.unpack_tril(dvvvv[p0:p1]*.25), mo_a,
                               (nocca,nmoa,nocca,nmoa)).T

    v_ba = fswap.create_dataset('v_ab', (nao_pair,nvira_pair), 'f8',
                                chunks=chunks_a)
    dvvOP = fswap.create_dataset('dvvOP', (nvira_pair,noccb,nmob), 'f8',
                                 chunks=(int(min(blksize_a,4e8/nmob)),1,nmob))
    for i in range(noccb):
        buf1 = numpy.empty((nmob,nvira,nvira))
        buf1[:noccb] = dOOvv[i] * .5
        buf1[noccb:] = dOVvv[i]
        buf1 = buf1.transpose(1,2,0) + buf1.transpose(2,1,0)
        dvvOP[:,i] = buf1[numpy.tril_indices(nvira)]
    for p0, p1 in lib.prange(0, nvira_pair, blksize_a):
        buf1 = numpy.zeros((p1-p0,nmob,nmob))
        buf1[:,noccb:,noccb:] = lib.unpack_tril(dvvVV[p0:p1] * .5)
        buf1[:,:noccb,:] = dvvOP[p0:p1] * .5
        v_ba[:,p0:p1] = _trans(buf1, mo_b, (0,nmob,0,nmob)).T
    dvvOO = dvvOV = None

    blksize_b = int(max_memory*.9e6/8/(nao_pair+nmob**2))
    blksize_b = min(nvirb_pair, max(ccsd.BLKMIN, blksize_b))
    chunks_b = (int(min(nao_pair, 4e8/blksize_b)), blksize_b)
    v_bb = fswap.create_dataset('v_bb', (nao_pair,nvirb_pair), 'f8',
                                chunks=chunks_b)
    for p0, p1 in lib.prange(0, nvirb_pair, blksize_b):
        v_bb[:,p0:p1] = _trans(lib.unpack_tril(dVVVV[p0:p1]*.25), mo_b,
                               (noccb,nmob,noccb,nmob)).T
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

# transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))
    blksize = int(max_memory*.9e6/8/(nao_pair+nmoa**2))
    blksize = min(nao_pair, max(ccsd.BLKMIN, blksize))
    o_aa = fswap.create_dataset('o_aa', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize))
    o_ab = fswap.create_dataset('o_ab', (nmoa,nocca,nao_pair), 'f8', chunks=(nocca,nocca,blksize))
    o_bb = fswap.create_dataset('o_bb', (nmob,noccb,nao_pair), 'f8', chunks=(noccb,noccb,blksize))
    buf1 = numpy.zeros((nocca,nocca,nmoa,nmoa))
    buf1[:,:,:nocca,:nocca] = _cp(doooo) * .25
    buf1[:,:,nocca:,nocca:] = _cp(doovv) * .5
    buf1 = _trans(buf1.reshape(nocca**2,-1), mo_a, (0,nmoa,0,nmoa))
    o_aa[:nocca] = buf1.reshape(nocca,nocca,nao_pair)

    buf1 = numpy.zeros((nocca,nocca,nmob,nmob))
    buf1[:,:,:noccb,:noccb] = _cp(dooOO) * .5
    buf1[:,:,:noccb,noccb:] = _cp(dooOV)
    buf1[:,:,noccb:,noccb:] = _cp(dooVV) * .5
    buf1 = _trans(buf1.reshape(nocca**2,-1), mo_b, (0,nmob,0,nmob))
    o_ab[:nocca] = buf1.reshape(nocca,nocca,nao_pair)

    buf1 = numpy.zeros((noccb,noccb,nmob,nmob))
    buf1[:,:,:noccb,:noccb] = _cp(dOOOO) * .25
    buf1[:,:,noccb:,noccb:] = _cp(dOOVV) * .5
    buf1 = _trans(buf1.reshape(noccb**2,-1), mo_b, (0,nmob,0,nmob))
    o_bb[:noccb] = buf1.reshape(noccb,noccb,nao_pair)

    dovoo = numpy.asarray(dooov).transpose(2,3,0,1)
    dovOO = numpy.asarray(dOOov).transpose(2,3,0,1)
    dOVOO = numpy.asarray(dOOOV).transpose(2,3,0,1)
    for p0, p1 in lib.prange(nocca, nmoa, nocca):
        buf1 = numpy.zeros((nocca,p1-p0,nmoa,nmoa))
        buf1[:,:,:nocca,:nocca] = dovoo[:,p0-nocca:p1-nocca]
        buf1[:,:,nocca:,:nocca] = dovvo[:,p0-nocca:p1-nocca] * .5
        buf1[:,:,:nocca,nocca:] = dovov[:,p0-nocca:p1-nocca] * .5
        buf1[:,:,nocca:,nocca:] = dovvv[:,p0-nocca:p1-nocca]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1)
        buf1 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        o_aa[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair)

        buf1 = numpy.zeros((nocca,p1-p0,nmob,nmob))
        buf1[:,:,:noccb,:noccb] = dovOO[:,p0-nocca:p1-nocca]
        buf1[:,:,noccb:,:noccb] = dovVO[:,p0-nocca:p1-nocca]
        buf1[:,:,:noccb,noccb:] = dovOV[:,p0-nocca:p1-nocca]
        buf1[:,:,noccb:,noccb:] = dovVV[:,p0-nocca:p1-nocca]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*nocca,-1)
        buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        o_ab[p0:p1] = buf1.reshape(p1-p0,nocca,nao_pair)

    for p0, p1 in lib.prange(noccb, nmob, noccb):
        buf1 = numpy.zeros((noccb,p1-p0,nmob,nmob))
        buf1[:,:,:noccb,:noccb] = dOVOO[:,p0-noccb:p1-noccb]
        buf1[:,:,noccb:,:noccb] = dOVVO[:,p0-noccb:p1-noccb] * .5
        buf1[:,:,:noccb,noccb:] = dOVOV[:,p0-noccb:p1-noccb] * .5
        buf1[:,:,noccb:,noccb:] = dOVVV[:,p0-noccb:p1-noccb]
        buf1 = buf1.transpose(1,0,3,2).reshape((p1-p0)*noccb,-1)
        buf1 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        o_bb[p0:p1] = buf1.reshape(p1-p0,noccb,nao_pair)
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)
    dovoo = buf1 = None

# transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    dm2a = fsave.create_dataset('dm2aa+ab', (nao_pair,nao_pair), 'f8',
                                chunks=(int(min(nao_pair,4e8/blksize)),blksize))
    dm2b = fsave.create_dataset('dm2bb+ab', (nao_pair,nao_pair), 'f8',
                                chunks=(int(min(nao_pair,4e8/blksize)),blksize))
    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmoa,nmoa))
        buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_aa[p0:p1]))
        buf1[:,:,:nocca] = o_aa[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        if p0 > 0:
            buf1 = _cp(dm2a[:p0,p0:p1])
            buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T
            buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T
            dm2a[:p0,p0:p1] = buf1
        lib.transpose_sum(buf2[:,p0:p1], inplace=True)
        dm2a[p0:p1] = buf2
        buf1 = buf2 = None

    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmob,nmob))
        buf1[:,noccb:,noccb:] = lib.unpack_tril(_cp(v_bb[p0:p1]))
        buf1[:,:,:noccb] = o_bb[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_b, (0,nmob,0,nmob))
        if p0 > 0:
            buf1 = _cp(dm2b[:p0,p0:p1])
            buf1[:p0,:p1-p0] += buf2[:p1-p0,:p0].T
            buf2[:p1-p0,:p0] = buf1[:p0,:p1-p0].T
            dm2b[:p0,p0:p1] = buf1
        lib.transpose_sum(buf2[:,p0:p1], inplace=True)
        dm2b[p0:p1] = buf2
        buf1 = buf2 = None

    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1-p0,nmoa,nmoa))
        buf1[:,nocca:,nocca:] = lib.unpack_tril(_cp(v_ba[p0:p1]))
        buf1[:,:,:nocca] = o_ab[:,:,p0:p1].transpose(2,0,1)
        buf2 = _trans(buf1, mo_a, (0,nmoa,0,nmoa))
        dm2a[:,p0:p1] = dm2a[:,p0:p1] + buf2.T
        dm2b[p0:p1] = dm2b[p0:p1] + buf2
        buf1 = buf2 = None

    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    if incore:
        return (fsave['dm2aa+ab'].value, fsave['dm2bb+ab'].value)
    else:
        return fsave
Esempio n. 45
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
# Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        doo, dvv = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    else:
        doo, dvv = d1
    if d2 is None:
        d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2)
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir

# Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc,nocc))
    Ivv = numpy.zeros((nvir,nvir))
    Ivo = numpy.zeros((nvir,nocc))
    Xvo = numpy.zeros((nvir,nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1,0,2,3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc,-1), d_oooo.reshape(nocc,-1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0,2,3,1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(eris_ooov.reshape(-1,nvir).T, d_oooo.reshape(nocc,-1).T, 2)
    Xvo +=(numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4
         - numpy.einsum('kj,ikja->ai', doo+doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ooov = _cp(dooov)
    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
    #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
    Ivv += lib.dot(eris_ooov.reshape(-1,nvir).T, d_ooov.reshape(-1,nvir))
    Ivo += lib.dot(d_ooov.reshape(-1,nvir).T, eris_oooo.reshape(-1,nocc))
    #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
    #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
    eris_oovv = _cp(eris.oovv)
    tmp = _cp(d_ooov.transpose(0,1,3,2).reshape(-1,nocc))
    tmpooov = _cp(eris_ooov.transpose(0,1,3,2))
    Ioo += lib.dot(tmpooov.reshape(-1,nocc).T, tmp)
    Xvo += lib.dot(eris_oovv.reshape(-1,nvir).T, tmp)
    eris_oooo = tmp = None

    d_ooov = d_ooov + d_ooov.transpose(1,0,2,3)
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov)
    #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov)
    Ioo += lib.dot(eris_ooov.reshape(nocc,-1), d_ooov.reshape(nocc,-1).T)
    Xvo += lib.dot(eris_ovov.reshape(-1,nvir).T,
                   _cp(d_ooov.transpose(0,2,3,1).reshape(nocc,-1)).T)
    d_ooov = None

    #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
    #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
    #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
    d_oovv = _cp(doovv + doovv.transpose(1,0,3,2))
    for i in range(nocc):
        Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc,-1).T)
    Ivv += lib.dot(eris_oovv.reshape(-1,nvir).T, d_oovv.reshape(-1,nvir))
    Ivo += lib.dot(d_oovv.reshape(-1,nvir).T, tmpooov.reshape(-1,nocc))
    d_oovv = _ccsd.precontract(d_oovv.reshape(-1,nvir,nvir)).reshape(nocc,nocc,-1)
    eris_ooov = tmpooov = None

    blksize = 4
    d_ovov = numpy.empty((nocc,nvir,nocc,nvir))
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0,p1):
            d_ovov[i] += d_ovvo[i-p0].transpose(0,2,1)
    d_ovvo = None
    #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1)
    lib.transpose_sum(d_ovov.reshape(nov,nov), inplace=True)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(d_ovov.reshape(-1,nvir).T, _cp(eris.ovoo).reshape(-1,nocc))
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc,-1), d_ovov.reshape(nocc,-1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1,nvir).T, d_ovov.reshape(-1,nvir))

    nvir_pair = nvir * (nvir+1) // 2
    bufe_ovvv = numpy.empty((blksize,nvir,nvir,nvir))
    bufc_ovvv = numpy.empty((blksize,nvir,nvir_pair))
    bufc_ovvv.data = bufe_ovvv.data
    c_vvvo = numpy.empty((nvir_pair,nvir,nocc))
    for p0, p1 in prange(0, nocc, blksize):
        d_ovvv = numpy.empty((p1-p0,nvir,nvir,nvir))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1-p0):
            lib.dot(dovvv[p0+i].reshape(nvir,-1),
                    eris_oovv[p0+i].reshape(nocc,-1).T, 1, Ivo, 1)

        c_ovvv = bufc_ovvv[:p1-p0]
        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        _ccsd.precontract(dovvv[p0:p1].reshape(-1,nvir,nvir), out=c_ovvv)
        for i0, i1, in prange(0, nvir_pair, BLKSIZE):
            for j0, j1 in prange(0, nvir, BLKSIZE//(p1-p0)+1):
                c_vvvo[i0:i1,j0:j1,p0:p1] = c_ovvv[:,j0:j1,i0:i1].transpose(2,1,0)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1-p0):
            lib.dot(eris_ovx[i].reshape(nvir,-1),
                    d_oovv[p0+i].reshape(nocc,-1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir,-1),
                    c_ovvv[i].reshape(nvir,-1).T, 1, Ivv, 1)

        eris_ovvv = bufe_ovvv[:p1-p0]
        _ccsd.unpack_tril(eris_ovx.reshape(-1,nvir_pair),
                          out=eris_ovvv.reshape(-1,nvir**2))
        eris_ovx = None
        #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov)
        d_ovvo = _cp(d_ovov[p0:p1].transpose(0,1,3,2))
        lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvo.reshape(-1,nocc), 1, Xvo, 1)

        e_ovvo, d_ovvo = d_ovvo, None
        for i in range(p1-p0):
            d_ovvv[i] = _ccsd.sum021(dovvv[p0+i])
            e_ovvo[i] = eris_ovov[p0+i].transpose(0,2,1)
        #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        lib.dot(d_ovvv.reshape(-1,nvir).T,
                e_ovvo[:p1-p0].reshape(-1,nocc), 1, Ivo, 1)
        lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvv.reshape(-1,nvir), 1, Ivv, 1)

        Xvo[:,p0:p1] +=(numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4
                      - numpy.einsum('cb,icba->ai', dvv+dvv.T, eris_ovvv))
    d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None
    eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None

    eris_ovvv = _cp(eris.ovvv)
    bufe_vvvo = numpy.empty((blksize*nvir,nvir,nocc))
    bufe_vvvv = numpy.empty((blksize*nvir,nvir,nvir))
    bufd_vvvv = numpy.empty((blksize*nvir,nvir,nvir))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0*(p0+1)//2
        off1 = p1*(p1+1)//2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i*(i+1)//2+i-off0] *= .5
        d_vvvv = _ccsd.unpack_tril(d_vvvv, out=bufd_vvvv[:off1-off0])
        eris_vvvv = _ccsd.unpack_tril(eris.vvvv[off0:off1], out=bufe_vvvv[:off1-off0])
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv)
        lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvv.reshape(-1,nvir), 2, Ivv, 1)
        d_vvvo = _cp(c_vvvo[off0:off1])
        lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvo.reshape(-1,nocc), 1, Xvo, 1)

        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        eris_vvvo = bufe_vvvo[:off1-off0]
        for i0, i1 in prange(off0, off1, BLKSIZE):
            for j0, j1, in prange(0, nvir, BLKSIZE//nocc+1):
                eris_vvvo[i0-off0:i1-off0,j0:j1,:] = eris_ovvv[:,j0:j1,i0:i1].transpose(2,1,0)
        lib.dot(eris_vvvo.reshape(-1,nocc).T, d_vvvo.reshape(-1,nocc), 1, Ioo, 1)
        lib.dot(d_vvvv.reshape(-1,nvir).T, eris_vvvo.reshape(-1,nocc), 2, Ivo, 1)

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 46
0
def update_amps(mycc, t1, t2, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc*nvir
    fock = eris.fock

    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)
    t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir))
    mycc.add_wvvVV_(t1, t2, eris, t2new_tril)
    for i in range(nocc):
        for j in range(i+1):
            t2new[i,j] = t2new_tril[i*(i+1)//2+j]
        t2new[i,i] *= .5
    t2new_tril = None
    time1 = log.timer_debug1('vvvv', *time0)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()
    t1new += fov

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij',  -t1, eris_ooov)
    woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2,-1)
    woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3))
    eris_ooov = None
    time1 = log.timer_debug1('woooo', *time1)

    unit = _memory_usage_inloop(nocc, nvir)
    max_memory = max(2000, mycc.max_memory - lib.current_memory()[0])
    blksize = min(nocc, max(BLKMIN, int(max_memory/unit)))
    blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2))
    blknvir = min(nvir, max(BLKMIN, blknvir))
    log.debug1('max_memory %d MB,  nocc,nvir = %d,%d  blksize = %d,%d',
               max_memory, nocc, nvir, blksize, blknvir)
    nvir_pair = nvir * (nvir+1) // 2
    def prefect_ovvv(p0, p1, q0, q1, prefetch):
        if q1 != nvir:
            q0, q1 = q1, min(nvir, q1+blknvir)
            readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch)
            readbuf[:] = eris.ovvv[p0:p1,q0:q1]
    def prefect_ovov(p0, p1, buf):
        buf[:] = eris.ovov[p0:p1]
    def prefect_oovv(p0, p1, buf):
        buf[:] = eris.oovv[p0:p1]

    buflen = max(nocc*nvir**2, nocc**3)
    bufs = numpy.empty((5,blksize*buflen))
    buf1, buf2, buf3, buf4, buf5 = bufs
    for p0, p1 in prange(0, nocc, blksize):
    #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
        wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3)
        wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4)
        handler = None
        readbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir))
        for q0, q1 in lib.prange(0, nvir, blknvir):
            if q0 == 0:
                readbuf[:] = eris.ovvv[p0:p1,q0:q1]
            else:
                readbuf, prefetchbuf = prefetchbuf, readbuf
            handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf)
            eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf)
            #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1])
            eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf)
            eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir)

            #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
            #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv)
            #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp)
            if not mycc.direct:
                eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir))
                eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1)
                tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1)
                for j0, j1 in prange(0, nocc, blksize):
                    tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2)
                    tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau)
                    tau += t2[j0:j1,:,q0:q1]
                    lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1,
                             tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0)
                tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2)
                tmp1[:] = tmp.transpose(1,0,2,3)
                lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
                eris_vovv = tau = tmp1 = tmp = None

            fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv)
            fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv)

            #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv)
            tmp = t1[:,q0:q1].copy()
            for i in range(eris_ovvv.shape[0]):
                lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1,
                         wooVV[i].reshape(nocc,-1))

            #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1)
            tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1)
            lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1))
            wOoVv[:,:,q0:q1] = tmp

            #: theta = t2.transpose(1,0,2,3) * 2 - t2
            #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
            theta = tmp
            theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3)
            theta *= 2
            theta -= t2[:,p0:p1,q0:q1,:]
            lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1)
            theta = tmp = None
        handler.join()
        readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1)

        tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1)
        tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3)
        lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1)

        eris_ooov = _cp(eris.ooov[p0:p1])
        eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir))
        handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv)
        tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1)
        tmp[:] = eris_ooov.transpose(0,1,3,2)
        #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1)
        t2new[p0:p1] += wOoVv.transpose(1,0,2,3)

        #:eris_oovv = _cp(eris.oovv[p0:p1])
        handler.join()
        eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir))
        handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov)
    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)
        wooVV -= eris_oovv

        #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv)
        #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp)
        tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1)
        tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2)
        for j in range(p1-p0):
            tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1)
            lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2)
            t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir)
        eris_oovv = None

        #:eris_ovov = _cp(eris.ovov[p0:p1])
        handler.join()
        for i in range(p1-p0):
            t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5
        t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov)
        #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp)
        for j in range(p1-p0):
            lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1)
            lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1)
        tmp1 = tmp2 = tmp = None

        fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
        fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1)
        theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2)
        for i in range(p1-p0):
            tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau)
            tau += t2[p0+i]
            theta = lib.transpose(tau, axes=(0,2,1), out=theta)
            theta *= 2
            theta -= tau
            vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau)
            lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1)
            lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1)
        tau = theta = vov = None

    #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1)
    #: t1new += numpy.einsum('jb,ijba->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta)
        theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        for i in range(p1-p0):
            tmp = t2[p0+i].transpose(0,2,1) * 2
            tmp-= t2[p0+i]
            lib.ddot(eris_ooov[i].reshape(nocc,-1),
                     tmp.reshape(-1,nvir), -1, t1new, 1)
            lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i])  # theta[i] = tmp.transpose(2,0,1)
        t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta)
        eris_ooov = None

    #: wOVov += eris.ovov
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
        for i in range(p1-p0):
            wOoVv[:,i] += wooVV[i]*.5  #: jiba + ijba*.5
        wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5)
        wOVov = wOVov.reshape(nocc,nvir,-1,nvir)
        eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3)
        eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir)
        wOVov += eris_OVov
        theta = theta.reshape(-1,nov)
        for i in range(nocc):  # OVov-OVov.transpose(0,3,2,1)*.5
            eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5
        for j0, j1 in prange(0, nocc, blksize):
            tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2)
            for i in range(j1-j0):
                tau[i]  = t2[j0+i].transpose(1,0,2) * 2
                tau[i] -= t2[j0+i].transpose(2,0,1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1)
            #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
            lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1),
                     .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
            tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1,
                           tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir)
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,0,2)
        theta = wOoVv = wOVov = eris_OVov = tmp = tau = None
        time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv)
        tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        tmp[:] = wooVV.transpose(0,2,1,3)
        woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir)
        eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3)
        eris_oOvV[:] = eris_ovov.transpose(0,2,1,3)
        eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5)
        eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir)

        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2)
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir,
                   eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir),
                   woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc)
            for i in range(j1-j0):
                tau[i] -= t2[j0+i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
            lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov),
                     eris_oVOv.reshape(-1,nov).T,
                    1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1)
        time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2)
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir),
                 .5, t2new.reshape(nocc*nocc,-1), 1)
        eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None

        t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1),
                               out=buf1).reshape(-1,nvir,nocc,nvir)
        tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2)
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                     t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,2,0)
                t2new[j0+i] += tmp[i].transpose(1,0,2) * .5
        woVoV = t2ibja = tmp = None
        time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1)
    buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.ddot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new += numpy.einsum('ib,ab->ia', t1, fvv)
    t1new -= numpy.einsum('ja,ji->ia', t1, foo)
    t1new /= eia

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new[i,j] += t2new[j,i].T
            t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j])
            t2new[j,i]  = t2new[i,j].T
            ij += 1

    time0 = log.timer_debug1('update t1 t2', *time0)
    return t1new, t2new
Esempio n. 47
0
def _rdm2_mo2ao(mycc, d2, mo_coeff, fsave=None):
    # dm2 = ccsd_rdm._make_rdm2(mycc, None, d2, with_dm1=False)
    # dm2 = numpy.einsum('pi,ijkl->pjkl', mo_coeff, dm2)
    # dm2 = numpy.einsum('pj,ijkl->ipkl', mo_coeff, dm2)
    # dm2 = numpy.einsum('pk,ijkl->ijpl', mo_coeff, dm2)
    # dm2 = numpy.einsum('pl,ijkl->ijkp', mo_coeff, dm2)
    # dm2 = dm2 + dm2.transpose(1,0,2,3)
    # dm2 = dm2 + dm2.transpose(0,1,3,2)
    # return ao2mo.restore(4, dm2*.5, nmo)
    log = logger.Logger(mycc.stdout, mycc.verbose)
    time1 = time.clock(), time.time()
    if fsave is None:
        incore = True
        fsave = lib.H5TmpFile()
    else:
        incore = False
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    nocc, nvir = dovov.shape[:2]
    mo_coeff = numpy.asarray(mo_coeff, order='F')
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    nvir_pair = nvir * (nvir + 1) // 2

    fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
    ftrans = _ccsd.libcc.AO2MOtranse2_nr_s1
    fmm = _ccsd.libcc.CCmmm_transpose_sum
    pao_loc = ctypes.POINTER(ctypes.c_void_p)()

    def _trans(vin, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow, nao_pair))
        fdrv(ftrans, fmm, out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow),
             ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc,
             ctypes.c_int(0))
        return out

    fswap = lib.H5TmpFile()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2))
    blksize = min(nvir_pair, max(ccsd.BLKMIN, blksize))
    chunks_vv = (int(min(blksize, 4e8 / blksize)), blksize)
    fswap.create_dataset('v', (nao_pair, nvir_pair), 'f8', chunks=chunks_vv)
    for p0, p1 in lib.prange(0, nvir_pair, blksize):
        fswap['v'][:, p0:p1] = _trans(lib.unpack_tril(_cp(dvvvv[p0:p1])),
                                      (nocc, nmo, nocc, nmo)).T
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

    # transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))
    blksize = int(max_memory * 1e6 / 8 / (nao_pair + nmo**2))
    blksize = min(nao_pair, max(ccsd.BLKMIN, blksize))
    fswap.create_dataset('o', (nmo, nocc, nao_pair),
                         'f8',
                         chunks=(nocc, nocc, blksize))
    buf1 = numpy.zeros((nocc, nocc, nmo, nmo))
    buf1[:, :, :nocc, :nocc] = doooo
    buf1[:, :, nocc:, nocc:] = _cp(doovv)
    buf1 = _trans(buf1.reshape(nocc**2, -1), (0, nmo, 0, nmo))
    fswap['o'][:nocc] = buf1.reshape(nocc, nocc, nao_pair)
    dovoo = numpy.asarray(dooov).transpose(2, 3, 0, 1)
    for p0, p1 in lib.prange(nocc, nmo, nocc):
        buf1 = numpy.zeros((nocc, p1 - p0, nmo, nmo))
        buf1[:, :, :nocc, :nocc] = dovoo[:, p0 - nocc:p1 - nocc]
        buf1[:, :, nocc:, :nocc] = dovvo[:, p0 - nocc:p1 - nocc]
        buf1[:, :, :nocc, nocc:] = dovov[:, p0 - nocc:p1 - nocc]
        buf1[:, :, nocc:, nocc:] = dovvv[:, p0 - nocc:p1 - nocc]
        buf1 = buf1.transpose(1, 0, 3, 2).reshape((p1 - p0) * nocc, -1)
        buf1 = _trans(buf1, (0, nmo, 0, nmo))
        fswap['o'][p0:p1] = buf1.reshape(p1 - p0, nocc, nao_pair)
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)
    dovoo = buf1 = None

    # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    gsave = fsave.create_dataset('dm2', (nao_pair, nao_pair),
                                 'f8',
                                 chunks=chunks_vv)
    for p0, p1 in lib.prange(0, nao_pair, blksize):
        buf1 = numpy.zeros((p1 - p0, nmo, nmo))
        buf1[:, nocc:, nocc:] = lib.unpack_tril(_cp(fswap['v'][p0:p1]))
        buf1[:, :, :nocc] = fswap['o'][:, :, p0:p1].transpose(2, 0, 1)
        buf2 = _trans(buf1, (0, nmo, 0, nmo))
        if p0 > 0:
            buf1 = _cp(gsave[:p0, p0:p1])
            buf1[:p0, :p1 - p0] += buf2[:p1 - p0, :p0].T
            buf2[:p1 - p0, :p0] = buf1[:p0, :p1 - p0].T
            gsave[:p0, p0:p1] = buf1
        lib.transpose_sum(buf2[:, p0:p1], inplace=True)
        gsave[p0:p1] = buf2
    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    if incore:
        return fsave['dm2'].value
    else:
        return fsave
Esempio n. 48
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
        # Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    if d2 is None:
        _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
        ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
        dovov = fd2intermediate['dovov']
        dvvvv = fd2intermediate['dvvvv']
        doooo = fd2intermediate['doooo']
        doovv = fd2intermediate['doovv']
        dovvo = fd2intermediate['dovvo']
        dovvv = fd2intermediate['dovvv']
        dooov = fd2intermediate['dooov']
    else:
        dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    nvir_pair = nvir * (nvir + 1) // 2
    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name, 'w')
    fswap.create_group('e_vvov')
    fswap.create_group('c_vvov')

    # Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc, nocc))
    Ivv = numpy.zeros((nvir, nvir))
    Ivo = numpy.zeros((nvir, nocc))
    Xvo = numpy.zeros((nvir, nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(
        eris_ooov.reshape(-1, nvir).T,
        d_oooo.reshape(nocc, -1).T, 2)
    Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum(
        'kj,ikja->ai', doo + doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ovov = numpy.empty((nocc, nvir, nocc, nvir))
    blksize = 8
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0, p1):
            d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1)
    d_ovvo = None
    d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape(
        nocc, nvir, nocc, nvir)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(
        d_ovov.reshape(-1, nvir).T,
        _cp(eris.ovoo).reshape(-1, nocc))
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir))
    eris_ovov = None
    fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2)
    d_ovov = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2)
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    iobuflen = int(256e6 / 8 / (blksize * nvir))
    log.debug1(
        'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        d_ooov = _cp(dooov[p0:p1])
        eris_oooo = _cp(eris.oooo[p0:p1])
        eris_ooov = _cp(eris.ooov[p0:p1])
        #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
        #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
        Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir))
        Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc))
        #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
        eris_oovv = _cp(eris.oovv[p0:p1])
        tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))
        Ioo += lib.dot(
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp)
        Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp)
        eris_oooo = tmp = None

        d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3)
        eris_ovov = _cp(eris.ovov[p0:p1])
        #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov)
        for i in range(p1 - p0):
            lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape(
                nocc, -1).T, 1, Ioo, 1)
            lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape(
                nocc, -1).T, 1, Xvo, 1)
        d_ooov = None

        #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
        #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
        #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
        d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2)
        for i in range(p1 - p0):
            Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(
                nocc, -1).T)
        Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir))
        Ivo += lib.dot(
            d_oovv.reshape(-1, nvir).T,
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)))
        eris_ooov = None
        d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(
            p1 - p0, nocc, -1)

        d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir))
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1 - p0):
            Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape(
                nocc, -1).T)
        eris_oovv = None

        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir))
        ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv,
                                        iobuflen)
        c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep,
                                        eris_ovx.reshape(-1, nvir_pair),
                                        iobuflen)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1 - p0):
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1)
        c_ovvv = d_oovv = None

        eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc))
        for i in range(p1 - p0):
            d_ovvv[i] = _ccsd.sum021(d_ovvv[i])
            eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1)
        #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov)
        Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc))
        eris_ovvo = eris_ovov = None

        eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair))
        eris_ovx = None
        eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir))
        Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 -
                          numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv))

        d_ovvo = _cp(fswap['dovvo'][p0:p1])
        #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv)
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1)

        d_ovvv = d_ovvo = eris_ovvv = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = nocc * nvir**2 + nvir**3 * 2.5
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    log.debug1(
        'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0 * (p0 + 1) // 2
        off1 = p1 * (p1 + 1) // 2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i * (i + 1) // 2 + i - off0] *= .5
        d_vvvv = lib.unpack_tril(d_vvvv)
        eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1]))
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv)
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1)
        #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2))
        d_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1,
                                     d_vvov.reshape(-1, nov))
        d_vvvo = _cp(d_vvov.transpose(0, 2, 1))
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1)
        d_vvov = eris_vvvv = None

        eris_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1,
                                     eris_vvov.reshape(-1, nov))
        eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1))
        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        lib.dot(
            d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1)
        lib.dot(
            eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1)
        eris_vvov = eris_vovv = d_vvvv = None

    del (fswap['e_vvov'])
    del (fswap['c_vvov'])
    del (fswap['dovvo'])
    fswap.close()
    _tmpfile = None

    if d2 is None:
        for key in fd2intermediate.keys():
            del (fd2intermediate[key])
        fd2intermediate.close()
        _d2tmpfile = None

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 49
0
def _rdm2_mo2ao(mycc, d2, dm1, mo_coeff, fsave=None):
    log = logger.Logger(mycc.stdout, mycc.verbose)
    if fsave is None:
        _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fsave = h5py.File(_dm2file.name, 'w')
    else:
        _dm2file = None
    time1 = time.clock(), time.time()
    dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2
    nocc, nvir = dovov.shape[:2]
    nov = nocc * nvir
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    nvir_pair = nvir * (nvir + 1) // 2
    mo_coeff = numpy.asarray(mo_coeff, order='F')

    def _trans(vin, orbs_slice, out=None):
        nrow = vin.shape[0]
        if out is None:
            out = numpy.empty((nrow, nao_pair))
        fdrv = getattr(_ccsd.libcc, 'AO2MOnr_e2_drv')
        pao_loc = ctypes.POINTER(ctypes.c_void_p)()
        fdrv(_ccsd.libcc.AO2MOtranse2_nr_s1, _ccsd.libcc.CCmmm_transpose_sum,
             out.ctypes.data_as(ctypes.c_void_p),
             vin.ctypes.data_as(ctypes.c_void_p),
             mo_coeff.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nrow),
             ctypes.c_int(nao), (ctypes.c_int * 4)(*orbs_slice), pao_loc,
             ctypes.c_int(0))
        return out

# transform dm2_ij to get lower triangular (dm2+dm2.transpose(0,1,3,2))

    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name)
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(
        1, int(max_memory * 1e6 / 8 / (nmo * nao_pair + nmo**3 + nvir**3)))
    iobuflen = int(256e6 / 8 / (blksize * nmo))
    log.debug1('_rdm2_mo2ao pass 1: blksize = %d, iobuflen = %d', blksize,
               iobuflen)
    fswap.create_group('o')  # for h5py old version
    pool1 = numpy.empty((blksize, nmo, nmo, nmo))
    pool2 = numpy.empty((blksize, nmo, nao_pair))
    bufd_ovvv = numpy.empty((blksize, nvir, nvir, nvir))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        buf1 = pool1[:p1 - p0]
        buf1[:, :nocc, :nocc, :nocc] = doooo[p0:p1]
        buf1[:, :nocc, :nocc, nocc:] = dooov[p0:p1]
        buf1[:, :nocc, nocc:, :nocc] = 0
        buf1[:, :nocc, nocc:, nocc:] = doovv[p0:p1]
        buf1[:, nocc:, :nocc, :nocc] = 0
        buf1[:, nocc:, :nocc, nocc:] = dovov[p0:p1]
        buf1[:, nocc:, nocc:, :nocc] = dovvo[p0:p1]
        d_ovvv = bufd_ovvv[:p1 - p0]
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        buf1[:, nocc:, nocc:, nocc:] = d_ovvv
        for i in range(p0, p1):
            buf1[i - p0, i, :, :] += dm1
            buf1[i - p0, :, :, i] -= dm1 * .5
        buf2 = pool2[:p1 - p0].reshape(-1, nao_pair)
        _trans(buf1.reshape(-1, nmo**2), (0, nmo, 0, nmo), buf2)
        ao2mo.outcore._transpose_to_h5g(fswap, 'o/%d' % istep, buf2, iobuflen)
    pool1 = pool2 = bufd_ovvv = None
    time1 = log.timer_debug1('_rdm2_mo2ao pass 1', *time1)

    fswap.create_group('v')  # for h5py old version
    pool1 = numpy.empty((blksize * nvir, nao_pair))
    pool2 = numpy.empty((blksize * nvir, nvir, nvir))
    for istep, (p0, p1) in enumerate(prange(0, nvir_pair, blksize * nvir)):
        buf1 = _cp(dvvvv[p0:p1])
        buf2 = lib.unpack_tril(buf1, out=pool2[:p1 - p0])
        buf1 = _trans(buf2, (nocc, nmo, nocc, nmo), out=pool1[:p1 - p0])
        ao2mo.outcore._transpose_to_h5g(fswap, 'v/%d' % istep, buf1, iobuflen)
    pool1 = pool2 = None
    time1 = log.timer_debug1('_rdm2_mo2ao pass 2', *time1)

    # transform dm2_kl then dm2 + dm2.transpose(2,3,0,1)
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(nao, int(max_memory * 1e6 / 8 / (nao_pair + nmo**2)))
    iobuflen = int(256e6 / 8 / blksize)
    log.debug1('_rdm2_mo2ao pass 3: blksize = %d, iobuflen = %d', blksize,
               iobuflen)
    gsave = fsave.create_group('dm2')
    for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)):
        gsave.create_dataset(str(istep), (nao_pair, p1 - p0), 'f8')
    diagidx = numpy.arange(nao)
    diagidx = diagidx * (diagidx + 1) // 2 + diagidx
    pool1 = numpy.empty((blksize, nmo, nmo))
    pool2 = numpy.empty((blksize, nvir_pair))
    pool3 = numpy.empty((blksize, nvir, nvir))
    pool4 = numpy.empty((blksize, nao_pair))
    for istep, (p0, p1) in enumerate(prange(0, nao_pair, blksize)):
        buf1 = pool1[:p1 - p0]
        ao2mo.outcore._load_from_h5g(fswap['o'], p0, p1,
                                     buf1[:, :nocc].reshape(p1 - p0, -1))
        buf2 = ao2mo.outcore._load_from_h5g(fswap['v'], p0, p1,
                                            pool2[:p1 - p0])
        buf3 = lib.unpack_tril(buf2, out=pool3[:p1 - p0])
        buf1[:, nocc:, nocc:] = buf3
        buf1[:, nocc:, :nocc] = 0
        buf2 = _trans(buf1, (0, nmo, 0, nmo), out=pool4[:p1 - p0])
        ic = 0
        idx = diagidx[diagidx < p1]
        if p0 > 0:
            buf1 = _cp(gsave[str(istep)][:p0])
            for i0, i1 in prange(0, p1 - p0, BLKSIZE):
                for j0, j1, in prange(0, p0, BLKSIZE):
                    buf1[j0:j1, i0:i1] += buf2[i0:i1, j0:j1].T
                    buf2[i0:i1, j0:j1] = buf1[j0:j1, i0:i1].T
            buf1[:, idx[p0 <= idx] - p0] *= .5
            gsave[str(istep)][:p0] = buf1
        lib.transpose_sum(buf2[:, p0:p1], inplace=True)
        buf2[:, idx] *= .5
        for ic, (i0, i1) in enumerate(prange(0, nao_pair, blksize)):
            gsave[str(ic)][p0:p1] = buf2[:, i0:i1]
    time1 = log.timer_debug1('_rdm2_mo2ao pass 3', *time1)
    del (fswap['o'])
    del (fswap['v'])
    fswap.close()
    _tmpfile = None
    time1 = log.timer_debug1('_rdm2_mo2ao cleanup', *time1)
    if _dm2file is not None:
        nvir_pair = nvir * (nvir + 1) // 2
        dm2 = numpy.empty((nvir_pair, nvir_pair))
        ao2mo.outcore._load_from_h5g(fsave['dm2'], 0, nvir_pair, dm2)
        fsave.close()
        _dm2file = None
        return dm2
    else:
        return fsave
Esempio n. 50
0
def get_eri(mydf, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    eri = pwdf_ao2mo.get_eri(mydf, kptijkl, compact=True)
    nao = cell.nao_nr()
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0] - nao**4*8/1e6) * .8)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL:
        eri *= .5  # because we'll do +cc later
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(j3cR.T, LpqR, 1, eri, 1)
            LpqR = LpqI = j3cR = j3cI = None
        eri = lib.transpose_sum(eri, inplace=True)
        if not compact:
            eri = ao2mo.restore(1, eri, nao).reshape(nao**2,-1)
        return eri

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1)
# eri == eri.transpose(3,2,1,0).conj()
#            zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1)
            LpqR = LpqI = j3cR = j3cI = None
# eri == eri.transpose(3,2,1,0).conj()
        eriR = lib.transpose_sum(eriR, inplace=True)
        buf = lib.transpose(eriI)
        eriI -= buf

        eriR = lib.transpose(eriR.reshape(-1,nao,nao), axes=(0,2,1), out=buf)
        eri += eriR.reshape(eri.shape)
        eriI = lib.transpose(eriI.reshape(-1,nao,nao), axes=(0,2,1), out=buf)
        eri += eriI.reshape(eri.shape)*1j
        return eri

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        max_memory *= .5
        for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1)
            LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None
        eri += eriR
        eri += eriI*1j
        return eri