Esempio n. 1
0
def kernel(mycc,
           t1=None,
           t2=None,
           l1=None,
           l2=None,
           eris=None,
           atmlst=None,
           mf_grad=None,
           verbose=logger.INFO):
    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if l1 is None: l1 = mycc.l1
    if l2 is None: l2 = mycc.l2
    if eris is None: eris = ccsd._ERIS(mycc)
    if mf_grad is None:
        mf_grad = rhf_grad.Gradients(mycc._scf)

    log = logger.Logger(mycc.stdout, mycc.verbose)
    time0 = time.clock(), time.time()
    mol = mycc.mol
    moidx = numpy.ones(mycc.mo_coeff.shape[1], dtype=numpy.bool)
    if isinstance(mycc.frozen, (int, numpy.integer)):
        raise NotImplementedError('frozen orbital ccsd_grad')
        moidx[:mycc.frozen] = False
    else:
        moidx[mycc.frozen] = False
    mo_coeff = mycc.mo_coeff[:,
                             moidx]  #FIXME: ensure mycc.mo_coeff is canonical orbital
    mo_energy = eris.fock.diagonal()
    nocc, nvir = t1.shape
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    log.debug('Build ccsd rdm1 intermediates')
    d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    time1 = log.timer('rdm1 intermediates', *time0)

    log.debug('Build ccsd rdm2 intermediates')
    _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
    d2 = ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
    time1 = log.timer('rdm2 intermediates', *time1)
    log.debug('Build ccsd response_rdm1')
    Ioo, Ivv, Ivo, Xvo = IX_intermediates(mycc, t1, t2, l1, l2, eris, d1, d2)
    time1 = log.timer('response_rdm1 intermediates', *time1)

    dm1mo = response_dm1(mycc, t1, t2, l1, l2, eris, (Ioo, Ivv, Ivo, Xvo))
    dm1mo[:nocc, :nocc] = doo + doo.T
    dm1mo[nocc:, nocc:] = dvv + dvv.T
    dm1ao = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    im1 = numpy.zeros_like(dm1mo)
    im1[:nocc, :nocc] = Ioo
    im1[nocc:, nocc:] = Ivv
    im1[nocc:, :nocc] = Ivo
    im1[:nocc, nocc:] = Ivo.T
    im1 = reduce(numpy.dot, (mo_coeff, im1, mo_coeff.T))
    time1 = log.timer('response_rdm1', *time1)

    log.debug('symmetrized rdm2 and MO->AO transformation')
    _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    # Basically, 4 times of dm2 is computed. *2 in _rdm2_mo2ao, *2 in _load_block_tril
    fdm2 = h5py.File(_dm2file.name, 'w')
    dm1_with_hf = dm1mo.copy()
    for i in range(
            nocc
    ):  # HF 2pdm ~ 4(ij)(kl)-2(il)(jk), diagonal+1 because of 4*dm2
        dm1_with_hf[i, i] += 1
    _rdm2_mo2ao(mycc, d2, dm1_with_hf, mo_coeff, fdm2)
    time1 = log.timer('MO->AO transformation', *time1)
    for key in fd2intermediate.keys():
        del (fd2intermediate[key])
    fd2intermediate.close()

    #TODO: pass hf_grad object to compute h1 and s1
    log.debug('h1 and JK1')
    h1 = mf_grad.get_hcore(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5
    zeta[nocc:, :nocc] = mo_energy[:nocc]
    zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1)
    zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T)
    vhf4sij = reduce(numpy.dot,
                     (p1, mycc._scf.get_veff(mol, dm1ao + dm1ao.T), p1))
    time1 = log.timer('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    hf_dm1 = mycc._scf.make_rdm1(mycc._scf.mo_coeff, mycc._scf.mo_occ)
    dm1ao += hf_dm1
    zeta += mf_grad.make_rdm1e(mycc._scf.mo_energy, mycc._scf.mo_coeff,
                               mycc._scf.mo_occ)

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(1, int(max_memory * 1e6 / 8 / (nao**3 * 2.5)))
    ioblksize = fdm2['dm2/0'].shape[-1]
    de = numpy.zeros((len(atmlst), 3))
    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        # s[1] dot I, note matrix im1 is not hermitian
        de[k] = (numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) +
                 numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1]))
        # h[1] \dot DM, *2 for +c.c.,  contribute to f1
        h1ao = mf_grad._grad_rinv(mol, ia)
        h1ao[:, p0:p1] += h1[:, p0:p1]
        de[k] += (numpy.einsum('xij,ij->x', h1ao, dm1ao) +
                  numpy.einsum('xji,ij->x', h1ao, dm1ao))
        # -s[1]*e \dot DM,  contribute to f1
        de[k] -= (numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) +
                  numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1]))
        # -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf4sij[p0:p1]) * 2

        # 2e AO integrals dot 2pdm
        ip0 = p0
        for b0, b1, nf in shell_prange(mol, shl0, shl1, blksize):
            eri1 = mol.intor('cint2e_ip1_sph',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=(b0, b1, 0, mol.nbas, 0, mol.nbas, 0,
                                         mol.nbas))
            eri1 = eri1.reshape(3, nf, nao, -1)
            dm2buf = numpy.empty((nf, nao, nao_pair))
            for ic, (i0, i1) in enumerate(prange(0, nao_pair, ioblksize)):
                _load_block_tril(fdm2['dm2/%d' % ic], ip0, ip0 + nf,
                                 dm2buf[:, :, i0:i1])
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2
            eri1 = dm2buf = None
            ip0 += nf
        log.debug('grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer('grad of atom %d' % ia, *time1)

    log.note('CCSD gradinets')
    log.note('==============')
    log.note('           x                y                z')
    for k, ia in enumerate(atmlst):
        log.note('%d %s  %15.9f  %15.9f  %15.9f', ia, mol.atom_symbol(ia),
                 de[k, 0], de[k, 1], de[k, 2])
    log.timer('CCSD gradients', *time0)
    for key in fdm2.keys():
        del (fdm2[key])
    fdm2.close()
    _d2tmpfile = _dm2file = None
    return de
Esempio n. 2
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
        # Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    if d2 is None:
        _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
        ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
        dovov = fd2intermediate['dovov']
        dvvvv = fd2intermediate['dvvvv']
        doooo = fd2intermediate['doooo']
        doovv = fd2intermediate['doovv']
        dovvo = fd2intermediate['dovvo']
        dovvv = fd2intermediate['dovvv']
        dooov = fd2intermediate['dooov']
    else:
        dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    nvir_pair = nvir * (nvir + 1) // 2
    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name, 'w')
    fswap.create_group('e_vvov')
    fswap.create_group('c_vvov')

    # Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc, nocc))
    Ivv = numpy.zeros((nvir, nvir))
    Ivo = numpy.zeros((nvir, nocc))
    Xvo = numpy.zeros((nvir, nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(
        eris_ooov.reshape(-1, nvir).T,
        d_oooo.reshape(nocc, -1).T, 2)
    Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 -
            numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ovov = numpy.empty((nocc, nvir, nocc, nvir))
    blksize = 8
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0, p1):
            d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1)
    d_ovvo = None
    d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape(
        nocc, nvir, nocc, nvir)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(
        d_ovov.reshape(-1, nvir).T,
        _cp(eris.ovoo).reshape(-1, nocc))
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir))
    eris_ovov = None
    fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2)
    d_ovov = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2)
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    iobuflen = int(256e6 / 8 / (blksize * nvir))
    log.debug1(
        'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        d_ooov = _cp(dooov[p0:p1])
        eris_oooo = _cp(eris.oooo[p0:p1])
        eris_ooov = _cp(eris.ooov[p0:p1])
        #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
        #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
        Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir))
        Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc))
        #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
        eris_oovv = _cp(eris.oovv[p0:p1])
        tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))
        Ioo += lib.dot(
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp)
        Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp)
        eris_oooo = tmp = None

        d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3)
        eris_ovov = _cp(eris.ovov[p0:p1])
        #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov)
        for i in range(p1 - p0):
            lib.dot(eris_ooov[i].reshape(nocc, -1),
                    d_ooov[i].reshape(nocc, -1).T, 1, Ioo, 1)
            lib.dot(eris_ovov[i].reshape(nvir, -1),
                    d_ooov[i].reshape(nocc, -1).T, 1, Xvo, 1)
        d_ooov = None

        #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
        #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
        #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
        d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2)
        for i in range(p1 - p0):
            Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1),
                           d_oovv[i].reshape(nocc, -1).T)
        Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir))
        Ivo += lib.dot(
            d_oovv.reshape(-1, nvir).T,
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)))
        eris_ooov = None
        d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(
            p1 - p0, nocc, -1)

        d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir))
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1 - p0):
            Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1),
                           eris_oovv[i].reshape(nocc, -1).T)
        eris_oovv = None

        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir))
        ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv,
                                        iobuflen)
        c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep,
                                        eris_ovx.reshape(-1, nvir_pair),
                                        iobuflen)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1 - p0):
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1)
        c_ovvv = d_oovv = None

        eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc))
        for i in range(p1 - p0):
            d_ovvv[i] = _ccsd.sum021(d_ovvv[i])
            eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1)
        #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov)
        Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc))
        eris_ovvo = eris_ovov = None

        eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair))
        eris_ovx = None
        eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir))
        Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 -
                          numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv))

        d_ovvo = _cp(fswap['dovvo'][p0:p1])
        #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv)
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1)

        d_ovvv = d_ovvo = eris_ovvv = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = nocc * nvir**2 + nvir**3 * 2.5
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    log.debug1(
        'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0 * (p0 + 1) // 2
        off1 = p1 * (p1 + 1) // 2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i * (i + 1) // 2 + i - off0] *= .5
        d_vvvv = lib.unpack_tril(d_vvvv)
        eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1]))
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv)
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1)
        #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2))
        d_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1,
                                     d_vvov.reshape(-1, nov))
        d_vvvo = _cp(d_vvov.transpose(0, 2, 1))
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1)
        d_vvov = eris_vvvv = None

        eris_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1,
                                     eris_vvov.reshape(-1, nov))
        eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1))
        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        lib.dot(
            d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1)
        lib.dot(
            eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1)
        eris_vvov = eris_vovv = d_vvvv = None

    del (fswap['e_vvov'])
    del (fswap['c_vvov'])
    del (fswap['dovvo'])
    fswap.close()
    _tmpfile = None

    if d2 is None:
        for key in fd2intermediate.keys():
            del (fd2intermediate[key])
        fd2intermediate.close()
        _d2tmpfile = None

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 3
0
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None):
    if eris is None:
        # Note eris are in Chemist's notation
        eris = ccsd._ERIS(mycc)
    if d1 is None:
        d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    if d2 is None:
        _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
        ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
        dovov = fd2intermediate['dovov']
        dvvvv = fd2intermediate['dvvvv']
        doooo = fd2intermediate['doooo']
        doovv = fd2intermediate['doovv']
        dovvo = fd2intermediate['dovvo']
        dovvv = fd2intermediate['dovvv']
        dooov = fd2intermediate['dooov']
    else:
        dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc * nvir
    nvir_pair = nvir * (nvir + 1) // 2
    _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fswap = h5py.File(_tmpfile.name, 'w')
    fswap.create_group('e_vvov')
    fswap.create_group('c_vvov')

    # Note Ioo, Ivv are not hermitian
    Ioo = numpy.zeros((nocc, nocc))
    Ivv = numpy.zeros((nvir, nvir))
    Ivo = numpy.zeros((nvir, nocc))
    Xvo = numpy.zeros((nvir, nocc))

    eris_oooo = _cp(eris.oooo)
    eris_ooov = _cp(eris.ooov)
    d_oooo = _cp(doooo)
    d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3))
    #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2
    Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2)
    d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1))
    #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2
    Xvo += lib.dot(
        eris_ooov.reshape(-1, nvir).T,
        d_oooo.reshape(nocc, -1).T, 2)
    Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum(
        'kj,ikja->ai', doo + doo.T, eris_ooov))
    eris_oooo = eris_ooov = d_oooo = None

    d_ovov = numpy.empty((nocc, nvir, nocc, nvir))
    blksize = 8
    for p0, p1 in prange(0, nocc, blksize):
        d_ovov[p0:p1] = _cp(dovov[p0:p1])
        d_ovvo = _cp(dovvo[p0:p1])
        for i in range(p0, p1):
            d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1)
    d_ovvo = None
    d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape(
        nocc, nvir, nocc, nvir)
    #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo)
    Ivo += lib.dot(
        d_ovov.reshape(-1, nvir).T,
        _cp(eris.ovoo).reshape(-1, nocc))
    eris_ovov = _cp(eris.ovov)
    #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov)
    #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov)
    Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T)
    Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir))
    eris_ovov = None
    fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2)
    d_ovov = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2)
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    iobuflen = int(256e6 / 8 / (blksize * nvir))
    log.debug1(
        'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)):
        d_ooov = _cp(dooov[p0:p1])
        eris_oooo = _cp(eris.oooo[p0:p1])
        eris_ooov = _cp(eris.ooov[p0:p1])
        #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov)
        #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo)
        Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir))
        Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc))
        #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv)
        eris_oovv = _cp(eris.oovv[p0:p1])
        tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))
        Ioo += lib.dot(
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp)
        Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp)
        eris_oooo = tmp = None

        d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3)
        eris_ovov = _cp(eris.ovov[p0:p1])
        #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov)
        #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov)
        for i in range(p1 - p0):
            lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape(
                nocc, -1).T, 1, Ioo, 1)
            lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape(
                nocc, -1).T, 1, Xvo, 1)
        d_ooov = None

        #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv)
        #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv)
        #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov)
        d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2)
        for i in range(p1 - p0):
            Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(
                nocc, -1).T)
        Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir))
        Ivo += lib.dot(
            d_oovv.reshape(-1, nvir).T,
            _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)))
        eris_ooov = None
        d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(
            p1 - p0, nocc, -1)

        d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir))
        ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir,
                                     d_ovvv.reshape(-1, nvir**2))
        #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv)
        for i in range(p1 - p0):
            Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape(
                nocc, -1).T)
        eris_oovv = None

        # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2))
        c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir))
        ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv,
                                        iobuflen)
        c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair)
        eris_ovx = _cp(eris.ovvv[p0:p1])
        ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep,
                                        eris_ovx.reshape(-1, nvir_pair),
                                        iobuflen)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv)
        for i in range(p1 - p0):
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1)
            lib.dot(eris_ovx[i].reshape(nvir, -1),
                    c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1)
        c_ovvv = d_oovv = None

        eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc))
        for i in range(p1 - p0):
            d_ovvv[i] = _ccsd.sum021(d_ovvv[i])
            eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1)
        #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov)
        Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc))
        eris_ovvo = eris_ovov = None

        eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair))
        eris_ovx = None
        eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir)
        #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv)
        #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv)
        Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir))
        Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 -
                          numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv))

        d_ovvo = _cp(fswap['dovvo'][p0:p1])
        #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv)
        lib.dot(
            eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1)

        d_ovvv = d_ovvo = eris_ovvv = None

    max_memory = mycc.max_memory - lib.current_memory()[0]
    unit = nocc * nvir**2 + nvir**3 * 2.5
    blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit))
    log.debug1(
        'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks',
        blksize, nocc, int((nocc + blksize - 1) / blksize))
    for p0, p1 in prange(0, nvir, blksize):
        off0 = p0 * (p0 + 1) // 2
        off1 = p1 * (p1 + 1) // 2
        d_vvvv = _cp(dvvvv[off0:off1]) * 4
        for i in range(p0, p1):
            d_vvvv[i * (i + 1) // 2 + i - off0] *= .5
        d_vvvv = lib.unpack_tril(d_vvvv)
        eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1]))
        #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2
        #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv)
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1)
        #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2))
        d_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1,
                                     d_vvov.reshape(-1, nov))
        d_vvvo = _cp(d_vvov.transpose(0, 2, 1))
        lib.dot(
            eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1)
        d_vvov = eris_vvvv = None

        eris_vvov = numpy.empty((off1 - off0, nocc, nvir))
        ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1,
                                     eris_vvov.reshape(-1, nov))
        eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1))
        #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo)
        #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2
        lib.dot(
            d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1)
        lib.dot(
            eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1)
        eris_vvov = eris_vovv = d_vvvv = None

    del (fswap['e_vvov'])
    del (fswap['c_vvov'])
    del (fswap['dovvo'])
    fswap.close()
    _tmpfile = None

    if d2 is None:
        for key in fd2intermediate.keys():
            del (fd2intermediate[key])
        fd2intermediate.close()
        _d2tmpfile = None

    Ioo *= -1
    Ivv *= -1
    Ivo *= -1
    Xvo += Ivo
    return Ioo, Ivv, Ivo, Xvo
Esempio n. 4
0
def kernel(mycc,
           t1=None,
           t2=None,
           l1=None,
           l2=None,
           eris=None,
           atmlst=None,
           mf_grad=None,
           verbose=logger.INFO):
    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if l1 is None: l1 = mycc.l1
    if l2 is None: l2 = mycc.l2
    if eris is None: eris = ccsd._ERIS(mycc)
    if mf_grad is None:
        mf_grad = rhf_grad.Gradients(mycc._scf)

    log = logger.Logger(mycc.stdout, mycc.verbose)
    time0 = time.clock(), time.time()
    mol = mycc.mol
    moidx = numpy.ones(mycc.mo_energy.size, dtype=numpy.bool)
    if isinstance(mycc.frozen, (int, numpy.integer)):
        raise NotImplementedError('frozen orbital ccsd_grad')
        moidx[:mycc.frozen] = False
    else:
        moidx[mycc.frozen] = False
    mo_coeff = mycc.mo_coeff[:,
                             moidx]  #FIXME: ensure mycc.mo_coeff is canonical orbital
    mo_energy = mycc.mo_energy[moidx]
    nocc, nvir = t1.shape
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    log.debug('Build ccsd rdm1 intermediates')
    d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    time1 = log.timer('rdm1 intermediates', *time0)

    log.debug('Build ccsd rdm2 intermediates')
    _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
    d2 = ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate)
    time1 = log.timer('rdm2 intermediates', *time1)
    log.debug('Build ccsd response_rdm1')
    Ioo, Ivv, Ivo, Xvo = IX_intermediates(mycc, t1, t2, l1, l2, eris, d1, d2)
    time1 = log.timer('response_rdm1 intermediates', *time1)

    dm1mo = response_dm1(mycc, t1, t2, l1, l2, eris, (Ioo, Ivv, Ivo, Xvo))
    dm1mo[:nocc, :nocc] = doo + doo.T
    dm1mo[nocc:, nocc:] = dvv + dvv.T
    dm1ao = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    im1 = numpy.zeros_like(dm1mo)
    im1[:nocc, :nocc] = Ioo
    im1[nocc:, nocc:] = Ivv
    im1[nocc:, :nocc] = Ivo
    im1[:nocc, nocc:] = Ivo.T
    im1 = reduce(numpy.dot, (mo_coeff, im1, mo_coeff.T))
    time1 = log.timer('response_rdm1', *time1)

    log.debug('symmetrized rdm2 and MO->AO transformation')
    _dm2file = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    # Basically, 4 times of dm2 is computed. *2 in _rdm2_mo2ao, *2 in _load_block_tril
    fdm2 = h5py.File(_dm2file.name, 'w')
    dm1_with_hf = dm1mo.copy()
    for i in range(
            nocc
    ):  # HF 2pdm ~ 4(ij)(kl)-2(il)(jk), diagonal+1 because of 4*dm2
        dm1_with_hf[i, i] += 1
    _rdm2_mo2ao(mycc, d2, dm1_with_hf, mo_coeff, fdm2)
    time1 = log.timer('MO->AO transformation', *time1)
    for key in fd2intermediate.keys():
        del (fd2intermediate[key])
    fd2intermediate.close()

    #TODO: pass hf_grad object to compute h1 and s1
    log.debug('h1 and JK1')
    h1 = mf_grad.get_hcore(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5
    zeta[nocc:, :nocc] = mo_energy[:nocc]
    zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1)
    zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T)
    vhf4sij = reduce(numpy.dot,
                     (p1, mycc._scf.get_veff(mol, dm1ao + dm1ao.T), p1))
    time1 = log.timer('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ)
    dm1ao += hf_dm1
    zeta += mf_grad.make_rdm1e(mycc.mo_energy, mycc.mo_coeff, mycc.mo_occ)

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    max_memory = mycc.max_memory - lib.current_memory()[0]
    blksize = max(1, int(max_memory * 1e6 / 8 / (nao**3 * 2.5)))
    ioblksize = fdm2['dm2/0'].shape[-1]
    de = numpy.zeros((len(atmlst), 3))
    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        # s[1] dot I, note matrix im1 is not hermitian
        de[k] = (numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) +
                 numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1]))
        # h[1] \dot DM, *2 for +c.c.,  contribute to f1
        h1ao = mf_grad._grad_rinv(mol, ia)
        h1ao[:, p0:p1] += h1[:, p0:p1]
        de[k] += (numpy.einsum('xij,ij->x', h1ao, dm1ao) + numpy.einsum(
            'xji,ij->x', h1ao, dm1ao))
        # -s[1]*e \dot DM,  contribute to f1
        de[k] -= (numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) +
                  numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1]))
        # -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf4sij[p0:p1]) * 2

        # 2e AO integrals dot 2pdm
        ip0 = p0
        for b0, b1, nf in shell_prange(mol, shl0, shl1, blksize):
            eri1 = mol.intor(
                'cint2e_ip1_sph',
                comp=3,
                aosym='s2kl',
                shls_slice=(b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas))
            eri1 = eri1.reshape(3, nf, nao, -1)
            dm2buf = numpy.empty((nf, nao, nao_pair))
            for ic, (i0, i1) in enumerate(prange(0, nao_pair, ioblksize)):
                _load_block_tril(fdm2['dm2/%d' % ic], ip0, ip0 + nf,
                                 dm2buf[:, :, i0:i1])
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2
            eri1 = dm2buf = None
            ip0 += nf
        log.debug('grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer('grad of atom %d' % ia, *time1)

    log.note('CCSD gradinets')
    log.note('==============')
    log.note('           x                y                z')
    for k, ia in enumerate(atmlst):
        log.note('%d %s  %15.9f  %15.9f  %15.9f', ia, mol.atom_symbol(ia),
                 de[k, 0], de[k, 1], de[k, 2])
    log.timer('CCSD gradients', *time0)
    for key in fdm2.keys():
        del (fdm2[key])
    fdm2.close()
    _d2tmpfile = _dm2file = None
    return de
Esempio n. 5
0
def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None,
           grad_hf=None, max_memory=2000, verbose=logger.INFO):
    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if l1 is None: l1 = mycc.l1
    if l2 is None: l2 = mycc.l2
    if eris is None: eris = ccsd._ERIS(mycc)
    if grad_hf is None:
        grad_hf = pyscf.grad.hf.RHF(mycc._scf)

    log = logger.Logger(mycc.stdout, mycc.verbose)
    time0 = time.clock(), time.time()
    mol = mycc.mol
    moidx = numpy.ones(mycc.mo_energy.size, dtype=numpy.bool)
    if isinstance(mycc.frozen, (int, numpy.integer)):
        raise NotImplementedError('frozen orbital ccsd_grad')
        moidx[:mycc.frozen] = False
    else:
        moidx[mycc.frozen] = False
    mo_coeff = mycc.mo_coeff[:,moidx]  #FIXME: ensure mycc.mo_coeff is canonical orbital
    mo_energy = mycc.mo_energy[moidx]
    nocc, nvir = t1.shape
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2

    log.debug('Build ccsd rdm1 intermediates')
    doo, dvv = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2, max_memory)
    time1 = log.timer('rdm1 intermediates', *time0)

    log.debug('Build ccsd rdm2 intermediates')
    _d2tmpfile = tempfile.NamedTemporaryFile()
    fd2intermediate = h5py.File(_d2tmpfile.name, 'w')
    d2 = ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate, max_memory)
    time1 = log.timer('rdm2 intermediates', *time1)
    log.debug('Build ccsd response_rdm1')
    Ioo, Ivv, Ivo, Xvo = IX_intermediates(mycc, t1, t2, l1, l2, eris, (doo,dvv),
                                          d2, max_memory)
    time1 = log.timer('response_rdm1 intermediates', *time1)

    dm1mo = response_dm1(mycc, t1, t2, l1, l2, eris, (Ioo, Ivv, Ivo, Xvo))
    dm1mo[:nocc,:nocc] = doo * 2
    dm1mo[nocc:,nocc:] = dvv * 2
    dm1ao = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    im1 = numpy.zeros_like(dm1mo)
    im1[:nocc,:nocc] = Ioo
    im1[nocc:,nocc:] = Ivv
    im1[nocc:,:nocc] = Ivo
    im1[:nocc,nocc:] = Ivo.T
    im1 = reduce(numpy.dot, (mo_coeff, im1, mo_coeff.T))
    time1 = log.timer('response_rdm1', *time1)

    log.debug('symmetrized rdm2 and MO->AO transformation')
    _dm2file = tempfile.NamedTemporaryFile()
    fdm2 = h5py.File(_dm2file.name, 'w')
    _rdm2_mo2ao(mycc, d2, dm1mo, mo_coeff, fdm2, max_memory)
    time1 = log.timer('MO->AO transformation', *time1)
    for key in fd2intermediate.keys():
        del(fd2intermediate[key])
    fd2intermediate.close()

#TODO: pass hf_grad object to compute h1 and s1
    log.debug('h1 and JK1')
    h1 = grad_hf.get_hcore(mol)
    s1 = grad_hf.get_ovlp(mol)
    zeta = numpy.empty((nmo,nmo))
    zeta[:nocc,:nocc] = (mo_energy[:nocc].reshape(-1,1) + mo_energy[:nocc]) * .5
    zeta[nocc:,nocc:] = (mo_energy[nocc:].reshape(-1,1) + mo_energy[nocc:]) * .5
    zeta[nocc:,:nocc] = mo_energy[:nocc]
    zeta[:nocc,nocc:] = mo_energy[:nocc].reshape(-1,1)
    zeta = reduce(numpy.dot, (mo_coeff, zeta*dm1mo, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:,:nocc], mo_coeff[:,:nocc].T)
    vhf4sij = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1ao+dm1ao.T), p1))
    time1 = log.timer('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ)
    dm1ao += hf_dm1
    zeta += grad_hf.make_rdm1e(mycc.mo_energy, mycc.mo_coeff, mycc.mo_occ)

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = grad_hf.aorange_by_atom()
    max_memory1 = max_memory - lib.current_memory()[0]
    blksize = max(1, int(max_memory1*1e6/8/(nao**3*2.5)))
    ioblksize = fdm2['dm2/0'].shape[-1]
    de = numpy.zeros((len(atmlst),3))
    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
# s[1] dot I, note matrix im1 is not hermitian
        de[k] =(numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1])
              + numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1]))
# h[1] \dot DM, *2 for +c.c.,  contribute to f1
        vrinv = grad_hf._grad_rinv(mol, ia)
        de[k] +=(numpy.einsum('xij,ij->x', h1[:,p0:p1], dm1ao[p0:p1]  )
               + numpy.einsum('xji,ij->x', h1[:,p0:p1], dm1ao[:,p0:p1]))
        de[k] +=(numpy.einsum('xij,ij->x', vrinv, dm1ao)
               + numpy.einsum('xji,ij->x', vrinv, dm1ao))
# -s[1]*e \dot DM,  contribute to f1
        de[k] -=(numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]  )
               + numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1]))
# -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf4sij[p0:p1]) * 2

# 2e AO integrals dot 2pdm
        ip0 = p0
        for b0, b1, nf in shell_prange(mol, shl0, shl1, blksize):
            eri1 = gto.moleintor.getints('cint2e_ip1_sph', mol._atm, mol._bas,
                                         mol._env, numpy.arange(b0,b1), comp=3,
                                         aosym='s2kl').reshape(3,nf,nao,-1)
            dm2buf = numpy.empty((nf,nao,nao_pair))
            for ic, (i0, i1) in enumerate(prange(0, nao_pair, ioblksize)):
                _load_block_tril(fdm2['dm2/%d'%ic], ip0, ip0+nf, dm2buf[:,:,i0:i1])
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2

            for i in range(3):
                #:tmp = _ccsd.unpack_tril(eri1[i].reshape(-1,nao_pair))
                #:vj = numpy.einsum('ijkl,kl->ij', tmp, hf_dm1[ip0:ip0+nf])
                #:vk = numpy.einsum('ijkl,jk->il', tmp, hf_dm1[ip0:ip0+nf])
                vj, vk = hf_get_jk_incore(eri1[i], hf_dm1)
                de[k,i] -=(numpy.einsum('ij,ij->', vj, hf_dm1[ip0:ip0+nf])
                         - numpy.einsum('ij,ij->', vk, hf_dm1[ip0:ip0+nf])*.5) * 2
            eri1 = dm2buf = None
            ip0 += nf
        log.debug('grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer('grad of atom %d'%ia, *time1)

    log.note('CCSD gradinets')
    log.note('==============')
    log.note('           x                y                z')
    for k, ia in enumerate(atmlst):
        log.note('%d %s  %15.9f  %15.9f  %15.9f', ia, mol.atom_symbol(ia),
                 de[k,0], de[k,1], de[k,2])
    log.timer('CCSD gradients', *time0)
    for key in fdm2.keys():
        del(fdm2[key])
    fdm2.close()
    _d2tmpfile = _dm2file = None
    return de