Exemple #1
0
def _call_rmb_vhf1(mol, dm, key='giao'):
    c1 = .5 / lib.param.LIGHT_SPEED
    n2c = dm.shape[0] // 2
    dmll = dm[:n2c, :n2c].copy()
    dmls = dm[:n2c, n2c:].copy()
    dmsl = dm[n2c:, :n2c].copy()
    dmss = dm[n2c:, n2c:].copy()
    vj = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex)
    vk = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex)
    vx = _vhf.rdirect_mapdm('int2e_' + key + '_sa10sp1spsp2_spinor', 's2kl',
                            ('ji->s2kl', 'lk->s1ij', 'jk->s1il', 'li->s1kj'),
                            dmss, 3, mol._atm, mol._bas, mol._env) * c1**4
    for i in range(3):
        vx[0, i] = lib.hermi_triu(vx[0, i], 2)
    vj[:, n2c:, n2c:] = vx[0] + vx[1]
    vk[:, n2c:, n2c:] = vx[2] + vx[3]

    vx = _vhf.rdirect_bindm('int2e_' + key + '_sa10sp1_spinor', 's2kl',
                            ('lk->s1ij', 'ji->s2kl', 'jk->s1il', 'li->s1kj'),
                            (dmll, dmss, dmsl, dmls), 3, mol._atm, mol._bas,
                            mol._env) * c1**2
    for i in range(3):
        vx[1, i] = lib.hermi_triu(vx[1, i], 2)
    vj[:, n2c:, n2c:] += vx[0]
    vj[:, :n2c, :n2c] += vx[1]
    vk[:, n2c:, :n2c] += vx[2]
    vk[:, :n2c, n2c:] += vx[3]
    for i in range(3):
        vj[i] = vj[i] + vj[i].T.conj()
        vk[i] = vk[i] + vk[i].T.conj()
    return vj, vk
    def test_unpack(self):
        a = numpy.random.random((400, 400))
        a = a + a * .5j
        for i in range(400):
            a[i, i] = a[i, i].real
        b = a - a.T.conj()
        b = numpy.array((b, b))
        x = lib.hermi_triu(b[0].T, hermi=2, inplace=0)
        self.assertAlmostEqual(abs(b[0].T - x).max(), 0, 12)

        x = lib.hermi_triu(b[1], hermi=2, inplace=0)
        self.assertAlmostEqual(abs(b[1] - x).max(), 0, 12)
        self.assertAlmostEqual(
            abs(x - lib.unpack_tril(lib.pack_tril(x), 2)).max(), 0, 12)

        x = lib.hermi_triu(a, hermi=1, inplace=0)
        self.assertAlmostEqual(abs(x - x.T.conj()).max(), 0, 12)

        xs = numpy.asarray((x, x, x))
        self.assertAlmostEqual(
            abs(xs - lib.unpack_tril(lib.pack_tril(xs))).max(), 0, 12)

        numpy.random.seed(1)
        a = numpy.random.random((5050, 20))
        self.assertAlmostEqual(lib.fp(lib.unpack_tril(a, axis=0)),
                               -103.03970592075423, 10)

        a = numpy.zeros((5, 0))
        self.assertEqual(lib.unpack_tril(a, axis=-1).shape, (5, 0, 0))

        a = numpy.zeros((0, 5))
        self.assertEqual(lib.unpack_tril(a, axis=0).shape, (0, 0, 5))
Exemple #3
0
def make_h1_soc2e(hfcobj, dm0):
    mf = hfcobj._scf
    ni = mf._numint
    hyb = ni.hybrid_coeff(mf.xc, spin=mol.spin)
    mem_now = lib.current_memory()[0]
    max_memory = max(2000, mf.max_memory * .9 - mem_now)
    v1 = get_vxc_soc(ni,
                     mol,
                     mf.grids,
                     mf.xc,
                     dm0,
                     max_memory=max_memory,
                     verbose=hfcobj.verbose)
    if abs(hyb) > 1e-10:
        vj, vk = uhf_hfc.get_jk(mol, dm0)
        v1 += vj[0] + vj[1]
        v1 -= vk * hyb
    else:
        vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij',
                               'lk->s2ij', dm0, 3, mol._atm, mol._bas,
                               mol._env)
        for i in range(3):
            lib.hermi_triu(vj[0, i], hermi=2, inplace=True)
            lib.hermi_triu(vj[1, i], hermi=2, inplace=True)
        v1 += vj[0] + vj[1]
    v1[1] *= -1
    return v1
Exemple #4
0
def get_j(mol, dm0):
    vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'), 'a4ij', 'lk->s2ij',
                           dm0, 3, mol._atm, mol._bas, mol._env)
    for i in range(3):
        lib.hermi_triu(vj[0, i], hermi=2, inplace=True)
        lib.hermi_triu(vj[1, i], hermi=2, inplace=True)
    return vj
Exemple #5
0
def _call_rmb_vhf1(mol, dm, key='giao'):
    c1 = .5 / lib.param.LIGHT_SPEED
    n2c = dm.shape[0] // 2
    dmll = dm[:n2c,:n2c].copy()
    dmls = dm[:n2c,n2c:].copy()
    dmsl = dm[n2c:,:n2c].copy()
    dmss = dm[n2c:,n2c:].copy()
    vj = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex)
    vk = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex)
    vx = _vhf.rdirect_mapdm('int2e_'+key+'_sa10sp1spsp2_spinor', 's2kl',
                            ('ji->s2kl', 'lk->s1ij', 'jk->s1il', 'li->s1kj'),
                            dmss, 3, mol._atm, mol._bas, mol._env) * c1**4
    for i in range(3):
        vx[0,i] = lib.hermi_triu(vx[0,i], 2)
    vj[:,n2c:,n2c:] = vx[0] + vx[1]
    vk[:,n2c:,n2c:] = vx[2] + vx[3]

    vx = _vhf.rdirect_bindm('int2e_'+key+'_sa10sp1_spinor', 's2kl',
                            ('lk->s1ij', 'ji->s2kl', 'jk->s1il', 'li->s1kj'),
                            (dmll,dmss,dmsl,dmls), 3,
                            mol._atm, mol._bas, mol._env) * c1**2
    for i in range(3):
        vx[1,i] = lib.hermi_triu(vx[1,i], 2)
    vj[:,n2c:,n2c:] += vx[0]
    vj[:,:n2c,:n2c] += vx[1]
    vk[:,n2c:,:n2c] += vx[2]
    vk[:,:n2c,n2c:] += vx[3]
    for i in range(3):
        vj[i] = vj[i] + vj[i].T.conj()
        vk[i] = vk[i] + vk[i].T.conj()
    return vj, vk
Exemple #6
0
def get_jk(mol_or_mf=None,
           dm=None,
           hermi=1,
           with_j=True,
           with_k=True,
           omega=None):
    '''MPI version of scf.hf.get_jk function'''
    #vj = get_j(mol_or_mf, dm, hermi)
    #vk = get_k(mol_or_mf, dm, hermi)
    if isinstance(mol_or_mf, gto.mole.Mole):
        mf = hf.SCF(mol_or_mf).view(SCF)
    else:
        mf = mol_or_mf

    # dm may be too big for mpi4py library to serialize. Broadcast dm here.
    if any(comm.allgather(dm is mpi.Message.SkippedArg)):
        dm = mpi.bcast_tagged_array(dm)

    mf.unpack_(comm.bcast(mf.pack()))
    if mf.opt is None:
        mf.opt = mf.init_direct_scf()

    if omega is None:
        vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8)
    else:
        with mf.mol.with_range_coulomb(omega):
            vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8)

    if rank == 0:
        for i in range(vj.shape[0]):
            lib.hermi_triu(vj[i], 1, inplace=True)
    return vj.reshape(dm.shape), vk.reshape(dm.shape)
Exemple #7
0
def _mat_hermi_(vk, hermi):
    if hermi == 1:
        if vk.ndim == 2:
            vk = lib.hermi_triu(vk, hermi)
        else:
            for i in range(vk.shape[0]):
                vk[i] = lib.hermi_triu(vk[i], hermi)
    return vk
Exemple #8
0
def get_j(mol, dm0):
    vj = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'),
                           'a4ij', 'lk->s2ij',
                           dm0, 3, mol._atm, mol._bas, mol._env)
    for i in range(3):
        lib.hermi_triu(vj[0,i], hermi=2, inplace=True)
        lib.hermi_triu(vj[1,i], hermi=2, inplace=True)
    return vj
Exemple #9
0
def get_jk(mol, dm0):
    # K_{pq} = (pi|iq) + (iq|pi)
    vj, vk, vk1 = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'),
                                    'a4ij', ('lk->s2ij', 'jk->s1il', 'li->s1kj'),
                                    dm0, 3, mol._atm, mol._bas, mol._env)
    for i in range(3):
        lib.hermi_triu(vj[0,i], hermi=2, inplace=True)
        lib.hermi_triu(vj[1,i], hermi=2, inplace=True)
    vk += vk1
    return vj, vk
Exemple #10
0
def get_jk(mol, dm0):
    # K_{pq} = (pi|iq) + (iq|pi)
    vj, vk, vk1 = _vhf.direct_mapdm(mol._add_suffix('int2e_p1vxp1'),
                                    'a4ij', ('lk->s2ij', 'jk->s1il', 'li->s1kj'),
                                    dm0, 3, mol._atm, mol._bas, mol._env)
    for i in range(3):
        lib.hermi_triu(vj[0,i], hermi=2, inplace=True)
        lib.hermi_triu(vj[1,i], hermi=2, inplace=True)
    vk += vk1
    return vj, vk
Exemple #11
0
def make_h1(mf,
            mo_coeff,
            mo_occ,
            chkfile=None,
            atmlst=None,
            verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:, mo_occ > 0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    h1a = -(mol.intor('int1e_ipkin', comp=3) +
            mol.intor('int1e_ipnuc', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    int2e_ip1 = mol._add_suffix('int2e_ip1')
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3)
        h1ao[:, p0:p1] += h1a[:, p0:p1]
        h1ao = h1ao + h1ao.transpose(0, 2, 1)

        shls_slice = (shl0, shl1) + (0, mol.nbas) * 3
        vj1, vj2, vk1, vk2 = \
                _vhf.direct_bindm(int2e_ip1, 's2kl',
                                  ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                  (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                  3, mol._atm, mol._bas, mol._env,
                                  shls_slice=shls_slice)
        for i in range(3):
            lib.hermi_triu(vj1[i], 1)
        vhf = vj1 - vk1 * .5
        vhf[:, p0:p1] += vj2 - vk2 * .5
        vhf = vhf + vhf.transpose(0, 2, 1)

        if chkfile is None:
            h1aos.append(h1ao + vhf)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao + vhf)
    if chkfile is None:
        return h1aos
    else:
        return chkfile
Exemple #12
0
def dot_eri_dm(eri, dms, nao_v=None, eri_dot_dm=True):
    assert (eri.dtype == numpy.double)
    eri = numpy.asarray(eri, order='C')
    dms = numpy.asarray(dms, order='C')
    dms_shape = dms.shape
    nao_dm = dms_shape[-1]
    if nao_v is None:
        nao_v = nao_dm

    dms = dms.reshape(-1, nao_dm, nao_dm)
    n_dm = dms.shape[0]

    vj = numpy.zeros((n_dm, nao_v, nao_v))

    dmsptr = []
    vjkptr = []
    fjkptr = []

    npair_v = nao_v * (nao_v + 1) // 2
    npair_dm = nao_dm * (nao_dm + 1) // 2
    if eri.ndim == 2 and npair_v * npair_dm == eri.size:  # 4-fold symmetry eri
        if eri_dot_dm:  # 'ijkl,kl->ij'
            fdrv = getattr(_vhf.libcvhf, 'CVHFnrs4_incore_drv_diff_size_v_dm')
            fvj = _vhf._fpointer('CVHFics4_kl_s2ij_diff_size')
        else:  # 'ijkl,ij->kl'
            fdrv = getattr(_vhf.libcvhf, 'CVHFnrs4_incore_drv_diff_size_dm_v')
            fvj = _vhf._fpointer('CVHFics4_ij_s2kl_diff_size')
        for i, dm in enumerate(dms):
            dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
            vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
            fjkptr.append(fvj)
    else:
        raise RuntimeError(
            'Array shape not consistent: nao_v %s, DM %s, eri %s' %
            (nao_v, dms_shape, eri.shape))

    n_ops = len(dmsptr)
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         (ctypes.c_void_p * n_ops)(*dmsptr),
         (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops),
         ctypes.c_int(nao_v), ctypes.c_int(nao_dm),
         (ctypes.c_void_p * n_ops)(*fjkptr))

    for i in range(n_dm):
        lib.hermi_triu(vj[i], 1, inplace=True)
    if n_dm == 1:
        vj = vj.reshape((nao_v, nao_v))
    else:
        vj = vj.reshape((n_dm, nao_v, nao_v))
    return vj
Exemple #13
0
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:,mo_occ>0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) +
           mol.intor('cint1e_ipnuc_sph', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3)
        h1ao[:,p0:p1] += h1a[:,p0:p1]
        h1ao = h1ao + h1ao.transpose(0,2,1)

        shls_slice = (shl0, shl1) + (0, mol.nbas)*3
        vj1, vj2, vk1, vk2 = \
                _vhf.direct_bindm('cint2e_ip1_sph', 's2kl',
                                  ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                  (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                  3, mol._atm, mol._bas, mol._env,
                                  shls_slice=shls_slice)
        for i in range(3):
            lib.hermi_triu(vj1[i], 1)
        vhf = vj1 - vk1*.5
        vhf[:,p0:p1] += vj2 - vk2*.5
        vhf = vhf + vhf.transpose(0,2,1)

        if chkfile is None:
            h1aos.append(h1ao+vhf)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao+vhf)
    if chkfile is None:
        return h1aos
    else:
        return chkfile
Exemple #14
0
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400):
    '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463
    '''
    if norb > 63:
        raise NotImplementedError('norb > 63')

    neleca, nelecb = _unpack_nelec(nelec)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag is None:
        hdiag = make_hdiag(h1e, eri, norb, nelec)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np-1)[:np]
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np]
    addra, addrb = divmod(addr, nb)
    stra = cistring.addrs2str(norb, neleca, addra)
    strb = cistring.addrs2str(norb, nelecb, addrb)
    np = len(addr)
    h0 = numpy.zeros((np,np))
    libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                            h1e.ctypes.data_as(ctypes.c_void_p),
                            eri.ctypes.data_as(ctypes.c_void_p),
                            stra.ctypes.data_as(ctypes.c_void_p),
                            strb.ctypes.data_as(ctypes.c_void_p),
                            ctypes.c_int(norb), ctypes.c_int(np))

    for i in range(np):
        h0[i,i] = hdiag[addr[i]]
    h0 = lib.hermi_triu(h0)
    return addr, h0
Exemple #15
0
def pspace(h1e, eri, norb, nelec, hdiag, np=400):
    '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463
    '''
    neleca, nelecb = _unpack_nelec(nelec)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np-1)[:np]
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np]
    addra, addrb = divmod(addr, nb)
    stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra],
                       dtype=numpy.uint64)
    strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb],
                       dtype=numpy.uint64)
    np = len(addr)
    h0 = numpy.zeros((np,np))
    libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                            h1e.ctypes.data_as(ctypes.c_void_p),
                            eri.ctypes.data_as(ctypes.c_void_p),
                            stra.ctypes.data_as(ctypes.c_void_p),
                            strb.ctypes.data_as(ctypes.c_void_p),
                            ctypes.c_int(norb), ctypes.c_int(np))

    for i in range(np):
        h0[i,i] = hdiag[addr[i]]
    h0 = lib.hermi_triu(h0)
    return addr, h0
Exemple #16
0
def _call_giao_vhf1(mol, dm):
    c1 = .5 / lib.param.LIGHT_SPEED
    n2c = dm.shape[0] // 2
    dmll = dm[:n2c, :n2c].copy()
    dmls = dm[:n2c, n2c:].copy()
    dmsl = dm[n2c:, :n2c].copy()
    dmss = dm[n2c:, n2c:].copy()
    vj = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex)
    vk = numpy.zeros((3, n2c * 2, n2c * 2), dtype=numpy.complex)
    vx = _vhf.rdirect_mapdm('int2e_g1_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), dmll, 3, mol._atm,
                            mol._bas, mol._env)
    vj[:, :n2c, :n2c] = vx[0]
    vk[:, :n2c, :n2c] = vx[1]
    vx = _vhf.rdirect_mapdm('int2e_spgsp1spsp2_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), dmss, 3, mol._atm,
                            mol._bas, mol._env) * c1**4
    vj[:, n2c:, n2c:] = vx[0]
    vk[:, n2c:, n2c:] = vx[1]
    vx = _vhf.rdirect_bindm('int2e_g1spsp2_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), (dmss, dmls), 3,
                            mol._atm, mol._bas, mol._env) * c1**2
    vj[:, :n2c, :n2c] += vx[0]
    vk[:, :n2c, n2c:] += vx[1]
    vx = _vhf.rdirect_bindm('int2e_spgsp1_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), (dmll, dmsl), 3,
                            mol._atm, mol._bas, mol._env) * c1**2
    vj[:, n2c:, n2c:] += vx[0]
    vk[:, n2c:, :n2c] += vx[1]
    for i in range(3):
        vj[i] = lib.hermi_triu(vj[i], 1)
        vk[i] = vk[i] + vk[i].T.conj()
    return vj, vk
Exemple #17
0
def _call_giao_vhf1(mol, dm):
    c1 = .5 / lib.param.LIGHT_SPEED
    n2c = dm.shape[0] // 2
    dmll = dm[:n2c,:n2c].copy()
    dmls = dm[:n2c,n2c:].copy()
    dmsl = dm[n2c:,:n2c].copy()
    dmss = dm[n2c:,n2c:].copy()
    vj = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex)
    vk = numpy.zeros((3,n2c*2,n2c*2), dtype=numpy.complex)
    vx = _vhf.rdirect_mapdm('int2e_g1_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), dmll, 3,
                            mol._atm, mol._bas, mol._env)
    vj[:,:n2c,:n2c] = vx[0]
    vk[:,:n2c,:n2c] = vx[1]
    vx = _vhf.rdirect_mapdm('int2e_spgsp1spsp2_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), dmss, 3,
                            mol._atm, mol._bas, mol._env) * c1**4
    vj[:,n2c:,n2c:] = vx[0]
    vk[:,n2c:,n2c:] = vx[1]
    vx = _vhf.rdirect_bindm('int2e_g1spsp2_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), (dmss,dmls), 3,
                            mol._atm, mol._bas, mol._env) * c1**2
    vj[:,:n2c,:n2c] += vx[0]
    vk[:,:n2c,n2c:] += vx[1]
    vx = _vhf.rdirect_bindm('int2e_spgsp1_spinor', 'a4ij',
                            ('lk->s2ij', 'jk->s1il'), (dmll,dmsl), 3,
                            mol._atm, mol._bas, mol._env) * c1**2
    vj[:,n2c:,n2c:] += vx[0]
    vk[:,n2c:,:n2c] += vx[1]
    for i in range(3):
        vj[i] = lib.hermi_triu(vj[i], 1)
        vk[i] = vk[i] + vk[i].T.conj()
    return vj, vk
Exemple #18
0
def pspace(h1e, eri, norb, nelec, hdiag, np=400):
    neleca, nelecb = direct_spin1._unpack_nelec(nelec)
    h1e_a = numpy.ascontiguousarray(h1e[0])
    h1e_b = numpy.ascontiguousarray(h1e[1])
    g2e_aa = ao2mo.restore(1, eri[0], norb)
    g2e_ab = ao2mo.restore(1, eri[1], norb)
    g2e_bb = ao2mo.restore(1, eri[2], norb)
    link_indexa = cistring.gen_linkstr_index_trilidx(range(norb), neleca)
    link_indexb = cistring.gen_linkstr_index_trilidx(range(norb), nelecb)
    nb = link_indexb.shape[0]
    addr = numpy.argsort(hdiag)[:np]
    addra = addr // nb
    addrb = addr % nb
    stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra],
                       dtype=numpy.long)
    strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb],
                       dtype=numpy.long)
    np = len(addr)
    h0 = numpy.zeros((np,np))
    libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p),
                                h1e_a.ctypes.data_as(ctypes.c_void_p),
                                h1e_b.ctypes.data_as(ctypes.c_void_p),
                                g2e_aa.ctypes.data_as(ctypes.c_void_p),
                                g2e_ab.ctypes.data_as(ctypes.c_void_p),
                                g2e_bb.ctypes.data_as(ctypes.c_void_p),
                                stra.ctypes.data_as(ctypes.c_void_p),
                                strb.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(np))

    for i in range(np):
        h0[i,i] = hdiag[addr[i]]
    h0 = lib.hermi_triu(h0)
    return addr, h0
Exemple #19
0
def incore(eri, dm, hermi=0):
    assert(not numpy.iscomplexobj(eri))
    eri = numpy.ascontiguousarray(eri)
    dm = numpy.ascontiguousarray(dm)
    nao = dm.shape[0]
    vj = numpy.empty((nao,nao))
    vk = numpy.empty((nao,nao))
    npair = nao*(nao+1)//2
    if eri.ndim == 2 and npair*npair == eri.size: # 4-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv')
        # 'ijkl,kl->ij'
        fvj = _fpointer('CVHFics4_kl_s2ij')
        # 'ijkl,il->jk'
        fvk = _fpointer('CVHFics4_il_s1jk')
        # or
        ## 'ijkl,ij->kl'
        #fvj = _fpointer('CVHFics4_ij_s2kl')
        ## 'ijkl,jk->il'
        #fvk = _fpointer('CVHFics4_jk_s1il')

        tridm = dm
    elif eri.ndim == 1 and npair*(npair+1)//2 == eri.size: # 8-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv')
        fvj = _fpointer('CVHFics8_tridm_vj')
        if hermi == 1:
            fvk = _fpointer('CVHFics8_jk_s2il')
        else:
            fvk = _fpointer('CVHFics8_jk_s1il')
        tridm = lib.pack_tril(lib.transpose_sum(dm))
        i = numpy.arange(nao)
        tridm[i*(i+1)//2+i] *= .5
    else:
        raise RuntimeError('Array shape not consistent: DM %s, eri %s'
                           % (dm.shape, eri.shape))
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         tridm.ctypes.data_as(ctypes.c_void_p),
         vj.ctypes.data_as(ctypes.c_void_p),
         dm.ctypes.data_as(ctypes.c_void_p),
         vk.ctypes.data_as(ctypes.c_void_p),
         ctypes.c_int(nao), fvj, fvk)
    if hermi != 0:
        vj = lib.hermi_triu(vj, hermi)
        vk = lib.hermi_triu(vk, hermi)
    else:
        vj = lib.hermi_triu(vj, 1)
    return vj, vk
Exemple #20
0
def incore(eri, dm, hermi=0):
    assert (not numpy.iscomplexobj(eri))
    eri = numpy.ascontiguousarray(eri)
    dm = numpy.ascontiguousarray(dm)
    nao = dm.shape[0]
    vj = numpy.empty((nao, nao))
    vk = numpy.empty((nao, nao))
    npair = nao * (nao + 1) // 2
    if eri.ndim == 2 and npair * npair == eri.size:  # 4-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv')
        # 'ijkl,kl->ij'
        fvj = _fpointer('CVHFics4_kl_s2ij')
        # 'ijkl,il->jk'
        fvk = _fpointer('CVHFics4_il_s1jk')
        # or
        ## 'ijkl,ij->kl'
        #fvj = _fpointer('CVHFics4_ij_s2kl')
        ## 'ijkl,jk->il'
        #fvk = _fpointer('CVHFics4_jk_s1il')

        tridm = dm
    elif eri.ndim == 1 and npair * (npair +
                                    1) // 2 == eri.size:  # 8-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv')
        fvj = _fpointer('CVHFics8_tridm_vj')
        if hermi == 1:
            fvk = _fpointer('CVHFics8_jk_s2il')
        else:
            fvk = _fpointer('CVHFics8_jk_s1il')
        tridm = lib.pack_tril(lib.transpose_sum(dm))
        i = numpy.arange(nao)
        tridm[i * (i + 1) // 2 + i] *= .5
    else:
        raise RuntimeError('Array shape not consistent: DM %s, eri %s' %
                           (dm.shape, eri.shape))
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         tridm.ctypes.data_as(ctypes.c_void_p),
         vj.ctypes.data_as(ctypes.c_void_p),
         dm.ctypes.data_as(ctypes.c_void_p),
         vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk)
    if hermi != 0:
        vj = lib.hermi_triu(vj, hermi)
        vk = lib.hermi_triu(vk, hermi)
    else:
        vj = lib.hermi_triu(vj, 1)
    return vj, vk
Exemple #21
0
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400):
    '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463
    '''
    if norb > 63:
        raise NotImplementedError('norb > 63')

    if h1e.dtype == numpy.complex or eri.dtype == numpy.complex:
        raise NotImplementedError('Complex Hamiltonian')

    neleca, nelecb = _unpack_nelec(nelec)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag is None:
        hdiag = make_hdiag(h1e, eri, norb, nelec)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np - 1)[:np].copy()
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np].copy()
    addra, addrb = divmod(addr, nb)
    stra = cistring.addrs2str(norb, neleca, addra)
    strb = cistring.addrs2str(norb, nelecb, addrb)
    np = len(addr)
    h0 = numpy.zeros((np, np))
    libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                            h1e.ctypes.data_as(ctypes.c_void_p),
                            eri.ctypes.data_as(ctypes.c_void_p),
                            stra.ctypes.data_as(ctypes.c_void_p),
                            strb.ctypes.data_as(ctypes.c_void_p),
                            ctypes.c_int(norb), ctypes.c_int(np))

    HERMITIAN_THRESHOLD = 1e-10
    if (abs(h1e - h1e.T).max() < HERMITIAN_THRESHOLD and
            abs(eri - eri.transpose(1, 0, 3, 2)).max() < HERMITIAN_THRESHOLD):
        # symmetric Hamiltonian
        h0 = lib.hermi_triu(h0)
    else:
        # Fill the upper triangular part
        h0 = numpy.asarray(h0, order='F')
        h1e = numpy.asarray(h1e.T, order='C')
        eri = numpy.asarray(eri.transpose(1, 0, 3, 2), order='C')
        libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                                h1e.ctypes.data_as(ctypes.c_void_p),
                                eri.ctypes.data_as(ctypes.c_void_p),
                                stra.ctypes.data_as(ctypes.c_void_p),
                                strb.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(np))

    idx = numpy.arange(np)
    h0[idx, idx] = hdiag[addr]
    return addr, h0
Exemple #22
0
def get_jk(mol_or_mf, dm, hermi=1):
    '''MPI version of scf.hf.get_jk function'''
    #vj = get_j(mol_or_mf, dm, hermi)
    #vk = get_k(mol_or_mf, dm, hermi)
    if isinstance(mol_or_mf, gto.mole.Mole):
        mf = hf.SCF(mol_or_mf).view(SCF)
    else:
        mf = mol_or_mf

    # dm may be too big for mpi4py library to serialize. Broadcast dm here.
    if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')):
        dm = mpi.bcast_tagged_array(dm)

    mf.unpack_(comm.bcast(mf.pack()))
    if mf.opt is None:
        mf.opt = mf.init_direct_scf()
    vj, vk = _eval_jk(mf, dm, hermi, _jk_jobs_s8)
    if rank == 0:
        for i in range(vj.shape[0]):
            lib.hermi_triu(vj[i], 1, inplace=True)
    return vj.reshape(dm.shape), vk.reshape(dm.shape)
Exemple #23
0
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400):
    '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463
    '''
    if norb > 63:
        raise NotImplementedError('norb > 63')

    neleca, nelecb = _unpack_nelec(nelec)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag is None:
        hdiag = make_hdiag(h1e, eri, norb, nelec)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np-1)[:np]
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np]
    addra, addrb = divmod(addr, nb)
    stra = cistring.addrs2str(norb, neleca, addra)
    strb = cistring.addrs2str(norb, nelecb, addrb)
    np = len(addr)
    h0 = numpy.zeros((np,np))
    libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                            h1e.ctypes.data_as(ctypes.c_void_p),
                            eri.ctypes.data_as(ctypes.c_void_p),
                            stra.ctypes.data_as(ctypes.c_void_p),
                            strb.ctypes.data_as(ctypes.c_void_p),
                            ctypes.c_int(norb), ctypes.c_int(np))

    HERMITIAN_THRESHOLD = 1e-10
    if (abs(h1e - h1e.T).max() < HERMITIAN_THRESHOLD and
        abs(eri - eri.transpose(1,0,3,2)).max() < HERMITIAN_THRESHOLD):
        # symmetric Hamiltonian
        h0 = lib.hermi_triu(h0)
    else:
        # Fill the upper triangular part
        h0 = numpy.asarray(h0, order='F')
        h1e = numpy.asarray(h1e.T, order='C')
        eri = numpy.asarray(eri.transpose(1,0,3,2), order='C')
        libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                                h1e.ctypes.data_as(ctypes.c_void_p),
                                eri.ctypes.data_as(ctypes.c_void_p),
                                stra.ctypes.data_as(ctypes.c_void_p),
                                strb.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(np))

    idx = numpy.arange(np)
    h0[idx,idx] = hdiag[addr]
    return addr, h0
Exemple #24
0
def _get_jk(mol,
            intor,
            comp,
            aosym,
            script_dms,
            shls_slice=None,
            cintopt=None,
            vhfopt=None):
    intor = mol._add_suffix(intor)
    scripts = script_dms[::2]
    dms = script_dms[1::2]
    vs = _vhf.direct_bindm(intor,
                           aosym,
                           scripts,
                           dms,
                           comp,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           vhfopt=vhfopt,
                           cintopt=cintopt,
                           shls_slice=shls_slice)
    for k, script in enumerate(scripts):
        if 's2' in script:
            hermi = 1
        elif 'a2' in script:
            hermi = 2
        else:
            continue

        shape = vs[k].shape
        if shape[-2] == shape[-1]:
            if comp > 1:
                for i in range(comp):
                    lib.hermi_triu(vs[k][i], hermi=hermi, inplace=True)
            else:
                lib.hermi_triu(vs[k], hermi=hermi, inplace=True)
    return vs
Exemple #25
0
    def test_unpack(self):
        a = numpy.random.random((400,400))
        a = a+a*.5j
        for i in range(400):
            a[i,i] = a[i,i].real
        b = a-a.T.conj()
        b = numpy.array((b,b))
        x = lib.hermi_triu(b[0].T, hermi=2, inplace=0)
        self.assertAlmostEqual(abs(b[0].T-x).max(), 0, 12)

        x = lib.hermi_triu(b[1], hermi=2, inplace=0)
        self.assertAlmostEqual(abs(b[1]-x).max(), 0, 12)
        self.assertAlmostEqual(abs(x - lib.unpack_tril(lib.pack_tril(x), 2)).max(), 0, 12)

        x = lib.hermi_triu(a, hermi=1, inplace=0)
        self.assertAlmostEqual(abs(x-x.T.conj()).max(), 0, 12)

        xs = numpy.asarray((x,x,x))
        self.assertAlmostEqual(abs(xs - lib.unpack_tril(lib.pack_tril(xs))).max(), 0, 12)

        numpy.random.seed(1)
        a = numpy.random.random((5050,20))
        self.assertAlmostEqual(lib.finger(lib.unpack_tril(a, axis=0)), -103.03970592075423, 10)
Exemple #26
0
def _get_jk(mol, intor, comp, aosym, script_dms,
            shls_slice=None, cintopt=None):
    intor = mol._add_suffix(intor)
    scripts = script_dms[::2]
    dms = script_dms[1::2]
    vs = _vhf.direct_bindm(intor, aosym, scripts, dms, comp,
                           mol._atm, mol._bas, mol._env,
                           cintopt=cintopt, shls_slice=shls_slice)
    for k, script in enumerate(scripts):
        if 's2' in script:
            hermi = 1
        elif 'a2' in script:
            hermi = 2
        else:
            continue

        shape = vs[k].shape
        if shape[-2] == shape[-1]:
            if comp > 1:
                for i in range(comp):
                    lib.hermi_triu(vs[k][i], hermi=hermi, inplace=True)
            else:
                lib.hermi_triu(vs[k], hermi=hermi, inplace=True)
    return vs
Exemple #27
0
def intor(mol):
    nao = mol.nao_nr()
    mat = numpy.zeros((nao,nao))
    ip = 0
    for ish in range(mol.nbas):
        jp = 0
        for jsh in range(ish+1):
            buf = type1_by_shell(mol, (ish,jsh))
            di, dj = buf.shape
            mat[ip:ip+di,jp:jp+dj] += buf

            buf = type2_by_shell(mol, (ish,jsh))
            di, dj = buf.shape
            mat[ip:ip+di,jp:jp+dj] += buf
            jp += dj
        ip += di
    return lib.hermi_triu(mat)
Exemple #28
0
def pspace(h1e, eri, norb, nelec, hdiag=None, np=400):
    neleca, nelecb = direct_spin1._unpack_nelec(nelec)
    h1e_a = numpy.ascontiguousarray(h1e[0])
    h1e_b = numpy.ascontiguousarray(h1e[1])
    g2e_aa = ao2mo.restore(1, eri[0], norb)
    g2e_ab = ao2mo.restore(1, eri[1], norb)
    g2e_bb = ao2mo.restore(1, eri[2], norb)
    link_indexa = cistring.gen_linkstr_index_trilidx(range(norb), neleca)
    link_indexb = cistring.gen_linkstr_index_trilidx(range(norb), nelecb)
    nb = link_indexb.shape[0]
    if hdiag is None:
        hdiag = make_hdiag(h1e, eri, norb, nelec)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np - 1)[:np]
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np]
    addra = addr // nb
    addrb = addr % nb
    stra = numpy.array([cistring.addr2str(norb, neleca, ia) for ia in addra],
                       dtype=numpy.long)
    strb = numpy.array([cistring.addr2str(norb, nelecb, ib) for ib in addrb],
                       dtype=numpy.long)
    np = len(addr)
    h0 = numpy.zeros((np, np))
    libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p),
                                h1e_a.ctypes.data_as(ctypes.c_void_p),
                                h1e_b.ctypes.data_as(ctypes.c_void_p),
                                g2e_aa.ctypes.data_as(ctypes.c_void_p),
                                g2e_ab.ctypes.data_as(ctypes.c_void_p),
                                g2e_bb.ctypes.data_as(ctypes.c_void_p),
                                stra.ctypes.data_as(ctypes.c_void_p),
                                strb.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(np))

    for i in range(np):
        h0[i, i] = hdiag[addr[i]]
    h0 = lib.hermi_triu(h0)
    return addr, h0
Exemple #29
0
def _jk_triu_(vj, vk, hermi):
    if hermi == 0:
        if vj.ndim == 2:
            vj = lib.hermi_triu(vj, 1)
        else:
            for i in range(vj.shape[0]):
                vj[i] = lib.hermi_triu(vj[i], 1)
    else:
        if vj.ndim == 2:
            vj = lib.hermi_triu(vj, hermi)
            vk = lib.hermi_triu(vk, hermi)
        else:
            for i in range(vj.shape[0]):
                vj[i] = lib.hermi_triu(vj[i], hermi)
                vk[i] = lib.hermi_triu(vk[i], hermi)
    return vj, vk
Exemple #30
0
def _jk_triu_(vj, vk, hermi):
    if hermi == 0:
        if vj.ndim == 2:
            vj = lib.hermi_triu(vj, 1)
        else:
            for i in range(vj.shape[0]):
                vj[i] = lib.hermi_triu(vj[i], 1)
    else:
        if vj.ndim == 2:
            vj = lib.hermi_triu(vj, hermi)
            vk = lib.hermi_triu(vk, hermi)
        else:
            for i in range(vj.shape[0]):
                vj[i] = lib.hermi_triu(vj[i], hermi)
                vk[i] = lib.hermi_triu(vk[i], hermi)
    return vj, vk
Exemple #31
0
def runjks2(dm1, ncomp, intorname, filldot, *namejk):
    vjk = runjk(dm1, ncomp, intorname, filldot, *namejk)
    return [lib.hermi_triu(v, 1) for v in vjk]
Exemple #32
0
    def test_direct_jk_s2(self):
        numpy.random.seed(15)

        dm1 = numpy.random.random((nao, nao))
        dm1 = dm1 + dm1.T
        vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1)
        vj1, vk1 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs8", "CVHFnrs8_ji_s2kl", "CVHFnrs8_jk_s2il")
        self.assertTrue(numpy.allclose(vj0, vj1))
        self.assertTrue(numpy.allclose(vk0, vk1))

        eri1 = ao2mo.restore(1, rhf._eri, nao)
        vj0 = numpy.einsum("ijkl,kl->ij", eri1, dm1)
        vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1)
        vj1, vj2 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs4", "CVHFnrs4_ji_s2kl", "CVHFnrs4_jk_s2il")
        self.assertTrue(numpy.allclose(vj0, vj1))
        self.assertTrue(numpy.allclose(vk0, vj2))

        vj1, vk1 = runjks2(dm1, 1, "cint2e_sph", "CVHFdot_nrs4", "CVHFnrs4_li_s2kj", "CVHFnrs4_jk_s2il")
        self.assertTrue(numpy.allclose(vk0, vj1))
        self.assertTrue(numpy.allclose(vk0, vk1))

        vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1)
        vk1 = runjks2(
            dm1,
            1,
            "cint2e_sph",
            "CVHFdot_nrs4",
            "CVHFnrs4_li_s2kj",
            "CVHFnrs4_jk_s2il",
            "CVHFnrs4_li_s2kj",
            "CVHFnrs4_jk_s2il",
        )
        self.assertTrue(numpy.allclose(vk0, vk1[0]))
        self.assertTrue(numpy.allclose(vk0, vk1[1]))
        self.assertTrue(numpy.allclose(vk0, vk1[2]))
        self.assertTrue(numpy.allclose(vk0, vk1[3]))

        vj0 = numpy.einsum("ijkl,kl->ij", eri1, dm1)
        vk0 = numpy.einsum("ijkl,jk->il", eri1, dm1)
        vk1 = runjks2(
            dm1,
            1,
            "cint2e_sph",
            "CVHFdot_nrs2kl",
            "CVHFnrs2kl_ji_s2kl",
            "CVHFnrs2kl_lk_s2ij",
            "CVHFnrs2kl_jk_s2il",
            "CVHFnrs2kl_li_s2kj",
        )
        self.assertTrue(numpy.allclose(vj0, vk1[0]))
        self.assertTrue(numpy.allclose(vj0, vk1[1]))
        self.assertTrue(numpy.allclose(vk0, vk1[2]))
        self.assertTrue(numpy.allclose(vk0, vk1[3]))

        vk1 = runjks2(
            dm1,
            1,
            "cint2e_sph",
            "CVHFdot_nrs2ij",
            "CVHFnrs2ij_ji_s2kl",
            "CVHFnrs2ij_lk_s2ij",
            "CVHFnrs2ij_jk_s2il",
            "CVHFnrs2ij_li_s2kj",
        )
        self.assertTrue(numpy.allclose(vj0, vk1[0]))
        self.assertTrue(numpy.allclose(vj0, vk1[1]))
        self.assertTrue(numpy.allclose(vk0, vk1[2]))
        self.assertTrue(numpy.allclose(vk0, vk1[3]))

        vk1 = runjks2(
            dm1,
            1,
            "cint2e_sph",
            "CVHFdot_nrs1",
            "CVHFnrs1_ji_s2kl",
            "CVHFnrs1_lk_s2ij",
            "CVHFnrs1_jk_s2il",
            "CVHFnrs1_li_s2kj",
        )
        self.assertTrue(numpy.allclose(vj0, vk1[0]))
        self.assertTrue(numpy.allclose(vj0, vk1[1]))
        self.assertTrue(numpy.allclose(vk0, vk1[2]))
        self.assertTrue(numpy.allclose(vk0, vk1[3]))

        vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1)
        vj1, vk1 = runjk(dm1, 1, "cint2e_sph", "CVHFdot_nrs8", "CVHFnrs8_ji_s2kl", "CVHFnrs8_jk_s2il")
        vj1 = lib.hermi_triu(vj1, 1)
        vk1 = lib.hermi_triu(vk1, 1)
        self.assertTrue(numpy.allclose(vj0, vj1))
        self.assertTrue(numpy.allclose(vk0, vk1))
Exemple #33
0
def get_jk_favorj(sgx,
                  dm,
                  hermi=1,
                  with_j=True,
                  with_k=True,
                  direct_scf_tol=1e-13):
    t0 = time.clock(), time.time()
    mol = sgx.mol
    grids = sgx.grids
    gthrd = sgx.grids_thrd

    dms = numpy.asarray(dm)
    dm_shape = dms.shape
    nao = dm_shape[-1]
    dms = dms.reshape(-1, nao, nao)
    nset = dms.shape[0]

    if sgx.debug:
        batch_nuc = _gen_batch_nuc(mol)
    else:
        batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol)

    # for basis set to shell
    intor = mol._add_suffix('int3c2e')
    fakemol = gto.fakemol_for_charges(grids.coords)
    atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env,
                                      fakemol._atm, fakemol._bas, fakemol._env)
    ao_loc = moleintor.make_loc(bas, intor)
    rao_loc = numpy.zeros((nao), dtype=int)
    for i in range(mol.nbas):
        for j in range(ao_loc[i], ao_loc[i + 1]):
            rao_loc[j] = i

    sn = numpy.zeros((nao, nao))
    ngrids = grids.coords.shape[0]
    max_memory = sgx.max_memory - lib.current_memory()[0]
    sblk = sgx.blockdim
    blksize = min(ngrids, max(4, int(min(sblk,
                                         max_memory * 1e6 / 8 / nao**2))))
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1, None]
        sn += lib.dot(ao.T, wao)

    ovlp = mol.intor_symmetric('int1e_ovlp')
    proj = scipy.linalg.solve(sn, ovlp)
    proj_dm = lib.einsum('ki,xij->xkj', proj, dms)

    t1 = logger.timer_debug1(mol, "sgX initialziation", *t0)
    vj = numpy.zeros_like(dms)
    vk = numpy.zeros_like(dms)
    tnuc = 0, 0
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1, None]

        fg = lib.einsum('gi,xij->xgj', wao, proj_dm)
        mask = numpy.zeros(i1 - i0, dtype=bool)
        for i in range(nset):
            gmaxfg = numpy.amax(numpy.absolute(fg[i]), axis=1)
            gmaxwao_v = numpy.amax(numpy.absolute(ao), axis=1)
            gmaxtt = gmaxfg * gmaxwao_v
            mask |= numpy.any(gmaxtt > 1e-7)
            mask |= numpy.any(gmaxtt < -1e-7)
        if not numpy.all(mask):
            ao = ao[mask]
            wao = wao[mask]
            fg = fg[:, mask]
            coords = coords[mask]

        # screening u by value of grids
        umaxg = numpy.amax(numpy.absolute(wao), axis=0)
        usi = numpy.argwhere(umaxg > 1e-7).reshape(-1)
        if len(usi) != 0:
            # screening v by ovlp
            uovl = ovlp[usi, :]
            vmaxu = numpy.amax(numpy.absolute(uovl), axis=0)
            osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1)
            udms = proj_dm[0][usi, :]
            # screening v by dm and ovlp then triangle matrix bn
            dmaxg = numpy.amax(numpy.absolute(udms), axis=0)
            dsi = numpy.argwhere(dmaxg > 1e-4).reshape(-1)
            vsi = numpy.intersect1d(dsi, osi)
            if len(vsi) != 0:
                vsh = numpy.unique(rao_loc[vsi])
                mol._bvv = vsh

        # screening u by value of grids
        umaxg = numpy.amax(numpy.absolute(wao), axis=0)
        usi = numpy.argwhere(umaxg > 1e-7).reshape(-1)
        if len(usi) != 0:
            # screening v by ovlp
            uovl = ovlp[usi, :]
            vmaxu = numpy.amax(numpy.absolute(uovl), axis=0)
            osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1)
            if len(osi) != 0:
                vsh = numpy.unique(rao_loc[osi])
                #print(vsh.shape,'eew',vsh)
                mol._bvv = vsh

        fg = lib.einsum('gi,xij->xgj', wao, proj_dm)
        mask = numpy.zeros(i1 - i0, dtype=bool)
        for i in range(nset):
            mask |= numpy.any(fg[i] > gthrd, axis=1)
            mask |= numpy.any(fg[i] < -gthrd, axis=1)
        if not numpy.all(mask):
            ao = ao[mask]
            fg = fg[:, mask]
            coords = coords[mask]

        if with_j:
            rhog = numpy.einsum('xgu,gu->xg', fg, ao)
        else:
            rhog = None

        if sgx.debug:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            gbn = batch_nuc(mol, coords)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()
            if with_j:
                jpart = numpy.einsum('guv,xg->xuv', gbn, rhog)
            if with_k:
                gv = lib.einsum('gtv,xgt->xgv', gbn, fg)
            gbn = None
        else:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            jpart, gv = batch_jk(mol, coords, rhog, fg)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()

        if with_j:
            vj += jpart
        if with_k:
            for i in range(nset):
                vk[i] += lib.einsum('gu,gv->uv', ao, gv[i])
        jpart = gv = None

    t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
    tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1]
    logger.debug1(
        sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
        'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0],
        tdot[1])

    for i in range(nset):
        lib.hermi_triu(vj[i], inplace=True)
    if with_k and hermi == 1:
        vk = (vk + vk.transpose(0, 2, 1)) * .5
    logger.timer(mol, "vj and vk", *t0)
    return vj.reshape(dm_shape), vk.reshape(dm_shape)
Exemple #34
0
def intor_cross(intor, cell1, cell2, comp=1, hermi=0, kpts=None, kpt=None):
    r'''1-electron integrals from two cells like

    .. math::

        \langle \mu | intor | \nu \rangle, \mu \in cell1, \nu \in cell2
    '''
    intor = moleintor.ascint3(intor)
    if kpts is None:
        if kpt is not None:
            kpts_lst = np.reshape(kpt, (1, 3))
        else:
            kpts_lst = np.zeros((1, 3))
    else:
        kpts_lst = np.reshape(kpts, (-1, 3))
    nkpts = len(kpts_lst)

    atm, bas, env = conc_env(cell1._atm, cell1._bas, cell1._env, cell2._atm,
                             cell2._bas, cell2._env)
    atm = np.asarray(atm, dtype=np.int32)
    bas = np.asarray(bas, dtype=np.int32)
    env = np.asarray(env, dtype=np.double)
    natm = len(atm)
    nbas = len(bas)
    shls_slice = (0, cell1.nbas, cell1.nbas, nbas)
    ao_loc = moleintor.make_loc(bas, intor)
    ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]]
    nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]]
    out = np.empty((nkpts, comp, ni, nj), dtype=np.complex128)

    if hermi == 0:
        aosym = 's1'
    else:
        aosym = 's2'
    fill = getattr(libpbc, 'PBCnr2c_fill_k' + aosym)
    fintor = getattr(moleintor.libcgto, intor)
    intopt = lib.c_null_ptr()

    Ls = cell1.get_lattice_Ls(rcut=max(cell1.rcut, cell2.rcut))
    expkL = np.asarray(np.exp(1j * np.dot(kpts_lst, Ls.T)), order='C')
    drv = libpbc.PBCnr2c_drv
    drv(fintor, fill, out.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nkpts),
        ctypes.c_int(comp), ctypes.c_int(len(Ls)),
        Ls.ctypes.data_as(ctypes.c_void_p),
        expkL.ctypes.data_as(ctypes.c_void_p),
        (ctypes.c_int * 4)(*(shls_slice[:4])),
        ao_loc.ctypes.data_as(ctypes.c_void_p), intopt,
        atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(natm),
        bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas),
        env.ctypes.data_as(ctypes.c_void_p))

    mat = []
    for k, kpt in enumerate(kpts_lst):
        v = out[k]
        if hermi != 0:
            for ic in range(comp):
                lib.hermi_triu(v[ic], hermi=hermi, inplace=True)
        if comp == 1:
            v = v[0]
        if abs(kpt).sum() < 1e-9:  # gamma_point
            v = v.real
        mat.append(v)

    if kpts is None or np.shape(kpts) == (3, ):  # A single k-point
        mat = mat[0]
    return mat
def runjks2(dm1, ncomp, intorname, filldot, *namejk):
    vjk = runjk(dm1, ncomp, intorname, filldot, *namejk)
    return [lib.hermi_triu(v, 1) for v in vjk]
Exemple #36
0
def make_hdiag_csf_slower (h1e, eri, norb, nelec, transformer, hdiag_det=None):
    ''' This is tricky because I need the diagonal blocks for each configuration in order to get
    the correct csf hdiag values, not just the diagonal elements for each determinant. '''
    smult = transformer.smult
    t0, w0 = time.process_time (), time.time ()
    tstr = tlib = tloop = wstr = wlib = wloop = 0
    if hdiag_det is None:
        hdiag_det = make_hdiag_det (None, h1e, eri, norb, nelec)
    eri = ao2mo.restore(1, eri, norb)
    neleca, nelecb = _unpack_nelec (nelec)
    min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape (norb, neleca, nelecb)
    _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult)
    npair_econf_size = npair_dconf_size * npair_sconf_size
    max_npair = min (neleca, nelecb)
    ncsf_all = count_all_csfs (norb, neleca, nelecb, smult)
    ndeta_all = cistring.num_strings(norb, neleca)
    ndetb_all = cistring.num_strings(norb, nelecb)
    ndet_all = ndeta_all * ndetb_all
    hdiag_csf = np.ascontiguousarray (np.zeros (ncsf_all, dtype=np.float64))
    hdiag_csf_check = np.ones (ncsf_all, dtype=np.bool)
    for npair in range (min_npair, max_npair+1):
        ipair = npair - min_npair
        nconf = npair_econf_size[ipair]
        ndet = npair_sdet_size[ipair]
        ncsf = npair_csf_size[ipair]
        if ncsf == 0:
            continue
        nspin = neleca + nelecb - 2*npair
        csd_offset = npair_csd_offset[ipair]
        csf_offset = npair_csf_offset[ipair]
        hdiag_conf = np.ascontiguousarray (np.zeros ((nconf, ndet, ndet), dtype=np.float64))
        det_addr = transformer.csd_mask[csd_offset:][:nconf*ndet]
        if ndet == 1:
            # Closed-shell singlets
            assert (ncsf == 1)
            hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat]
            hdiag_csf_check[csf_offset:][:nconf] = False
            continue
        umat = get_spin_evecs (nspin, neleca, nelecb, smult)
        det_addra, det_addrb = divmod (det_addr, ndetb_all)
        t1, w1 = time.process_time (), time.time ()
        det_stra = cistring.addrs2str (norb, neleca, det_addra).reshape (nconf, ndet, order='C')
        det_strb = cistring.addrs2str (norb, nelecb, det_addrb).reshape (nconf, ndet, order='C')
        tstr += time.process_time () - t1
        wstr += time.time () - w1
        det_addr = det_addr.reshape (nconf, ndet, order='C')
        diag_idx = np.diag_indices (ndet)
        triu_idx = np.triu_indices (ndet)   
        ipair_check = 0
        # It looks like the library call below is, itself, usually responsible for about 50% of the
        # clock and wall time that this function consumes.
        t1, w1 = time.process_time (), time.time ()
        for iconf in range (nconf):
            addr = det_addr[iconf]
            assert (len (addr) == ndet)
            stra = det_stra[iconf]
            strb = det_strb[iconf]
            t2, w2 = time.process_time (), time.time ()
            libfci.FCIpspace_h0tril(hdiag_conf[iconf].ctypes.data_as(ctypes.c_void_p),
                h1e.ctypes.data_as(ctypes.c_void_p),
                eri.ctypes.data_as(ctypes.c_void_p),
                stra.ctypes.data_as(ctypes.c_void_p),
                strb.ctypes.data_as(ctypes.c_void_p),
                ctypes.c_int(norb), ctypes.c_int(ndet))
            tlib += time.process_time () - t2
            wlib += time.time () - w2
            #hdiag_conf[iconf][diag_idx] = hdiag_det[addr]
            #hdiag_conf[iconf] = lib.hermi_triu(hdiag_conf[iconf])
        for iconf in range (nconf): hdiag_conf[iconf] = lib.hermi_triu (hdiag_conf[iconf])
        for iconf in range (nconf): hdiag_conf[iconf][diag_idx] = hdiag_det[det_addr[iconf]]
        tloop += time.process_time () - t1
        wloop += time.time () - w1

        hdiag_conf = np.tensordot (hdiag_conf, umat, axes=1)
        hdiag_conf = (hdiag_conf * umat[np.newaxis,:,:]).sum (1)
        hdiag_csf[csf_offset:][:nconf*ncsf] = hdiag_conf.ravel (order='C')
        hdiag_csf_check[csf_offset:][:nconf*ncsf] = False
    assert (np.count_nonzero (hdiag_csf_check) == 0), np.count_nonzero (hdiag_csf_check)
    #print ("Total time in hdiag_csf: {}, {}".format (time.process_time () - t0, time.time () - w0))
    #print ("    Loop: {}, {}".format (tloop, wloop))
    #print ("    Library: {}, {}".format (tlib, wlib))
    #print ("    Cistring: {}, {}".format (tstr, wstr))
    return hdiag_csf
Exemple #37
0
def _eval_jk(mf, dm, hermi, gen_jobs):
    cpu0 = (logger.process_clock(), logger.perf_counter())
    mol = mf.mol
    ao_loc = mol.ao_loc_nr()
    nao = ao_loc[-1]

    bas_groups = _partition_bas(mol)
    jobs = gen_jobs(len(bas_groups), hermi)
    njobs = len(jobs)
    logger.debug1(mf, 'njobs %d', njobs)

    # Each job has multiple recipes.
    n_recipes = len(jobs[0][1:])
    dm = numpy.asarray(dm).reshape(-1, nao, nao)
    n_dm = dm.shape[0]
    vk = numpy.zeros((n_recipes, n_dm, nao, nao))

    if mf.opt is None:
        vhfopt = mf.init_direct_scf(mol)
    else:
        vhfopt = mf.opt
    # Assign the entire dm_cond to vhfopt.
    # The prescreen function CVHFnrs8_prescreen will index q_cond and dm_cond
    # over the entire basis.  "set_dm" in function jk.get_jk/direct_bindm only
    # creates a subblock of dm_cond which is not compatible with
    # CVHFnrs8_prescreen.
    vhfopt.set_dm(dm, mol._atm, mol._bas, mol._env)
    # Then skip the "set_dm" initialization in function jk.get_jk/direct_bindm.
    vhfopt._dmcondname = None

    logger.timer_debug1(mf, 'get_jk initialization', *cpu0)
    for job_id in mpi.work_stealing_partition(range(njobs)):
        group_ids = jobs[job_id][0]
        recipes = jobs[job_id][1:]

        shls_slice = lib.flatten([bas_groups[i] for i in group_ids])
        loc = ao_loc[shls_slice].reshape(4, 2)

        dm_blks = []
        for i_dm in range(n_dm):
            for ir, recipe in enumerate(recipes):
                for i, rec in enumerate(recipe):
                    p0, p1 = loc[rec[0]]
                    q0, q1 = loc[rec[1]]
                    dm_blks.append(dm[i_dm, p0:p1, q0:q1])
        scripts = [
            'ijkl,%s%s->%s%s' % tuple(['ijkl'[x] for x in rec])
            for recipe in recipes for rec in recipe
        ] * n_dm

        kparts = jk.get_jk(mol,
                           dm_blks,
                           scripts,
                           shls_slice=shls_slice,
                           vhfopt=vhfopt)

        for i_dm in range(n_dm):
            for ir, recipe in enumerate(recipes):
                for i, rec in enumerate(recipe):
                    p0, p1 = loc[rec[2]]
                    q0, q1 = loc[rec[3]]
                    vk[ir, i_dm, p0:p1, q0:q1] += kparts[i]
                # Pop the results of one recipe
                kparts = kparts[i + 1:]

    vk = mpi.reduce(vk)
    if rank == 0:
        if hermi:
            for i in range(n_recipes):
                for j in range(n_dm):
                    lib.hermi_triu(vk[i, j], hermi, inplace=True)
    else:
        # Zero out vk on workers. If reduce(get_jk()) is called twice,
        # non-zero vk on workers can cause error.
        vk[:] = 0
    logger.timer(mf, 'get_jk', *cpu0)
    return vk
Exemple #38
0
def direct(dms, atm, bas, env, vhfopt=None, hermi=0, cart=False):
    c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C')
    c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C')
    c_env = numpy.asarray(env, dtype=numpy.double, order='C')
    natm = ctypes.c_int(c_atm.shape[0])
    nbas = ctypes.c_int(c_bas.shape[0])

    if isinstance(dms, numpy.ndarray) and dms.ndim == 2:
        dms = dms[numpy.newaxis, :, :]
    n_dm = len(dms)
    nao = dms[0].shape[0]
    dms = numpy.asarray(dms, order='C')

    if vhfopt is None:
        if cart:
            intor = 'int2e_cart'
        else:
            intor = 'int2e_sph'
        cintopt = make_cintopt(c_atm, c_bas, c_env, intor)
        cvhfopt = lib.c_null_ptr()
    else:
        vhfopt.set_dm(dms, atm, bas, env)
        cvhfopt = vhfopt._this
        cintopt = vhfopt._cintopt
        intor = vhfopt._intor
    cintor = _fpointer(intor)

    fdrv = getattr(libcvhf, 'CVHFnr_direct_drv')
    fdot = _fpointer('CVHFdot_nrs8')
    fvj = _fpointer('CVHFnrs8_ji_s2kl')
    if hermi == 1:
        fvk = _fpointer('CVHFnrs8_li_s2kj')
    else:
        fvk = _fpointer('CVHFnrs8_li_s1kj')
    vjk = numpy.empty((2, n_dm, nao, nao))
    fjk = (ctypes.c_void_p * (2 * n_dm))()
    dmsptr = (ctypes.c_void_p * (2 * n_dm))()
    vjkptr = (ctypes.c_void_p * (2 * n_dm))()
    for i in range(n_dm):
        dmsptr[i] = dms[i].ctypes.data_as(ctypes.c_void_p)
        vjkptr[i] = vjk[0, i].ctypes.data_as(ctypes.c_void_p)
        fjk[i] = fvj
    for i in range(n_dm):
        dmsptr[n_dm + i] = dms[i].ctypes.data_as(ctypes.c_void_p)
        vjkptr[n_dm + i] = vjk[1, i].ctypes.data_as(ctypes.c_void_p)
        fjk[n_dm + i] = fvk
    shls_slice = (ctypes.c_int * 8)(*([0, c_bas.shape[0]] * 4))
    ao_loc = make_loc(bas, intor)

    fdrv(cintor, fdot, fjk, dmsptr, vjkptr, ctypes.c_int(n_dm * 2),
         ctypes.c_int(1), shls_slice, ao_loc.ctypes.data_as(ctypes.c_void_p),
         cintopt, cvhfopt, c_atm.ctypes.data_as(ctypes.c_void_p), natm,
         c_bas.ctypes.data_as(ctypes.c_void_p), nbas,
         c_env.ctypes.data_as(ctypes.c_void_p))

    # vj must be symmetric
    for idm in range(n_dm):
        vjk[0, idm] = lib.hermi_triu(vjk[0, idm], 1)
    if hermi != 0:  # vk depends
        for idm in range(n_dm):
            vjk[1, idm] = lib.hermi_triu(vjk[1, idm], hermi)
    if n_dm == 1:
        vjk = vjk.reshape(2, nao, nao)
    return vjk
Exemple #39
0
def direct(dms,
           atm,
           bas,
           env,
           vhfopt=None,
           hermi=0,
           cart=False,
           with_j=True,
           with_k=True):
    c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C')
    c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C')
    c_env = numpy.asarray(env, dtype=numpy.double, order='C')
    natm = ctypes.c_int(c_atm.shape[0])
    nbas = ctypes.c_int(c_bas.shape[0])

    dms = numpy.asarray(dms, order='C')
    dms_shape = dms.shape
    nao = dms_shape[-1]
    dms = dms.reshape(-1, nao, nao)
    n_dm = dms.shape[0]

    if vhfopt is None:
        if cart:
            intor = 'int2e_cart'
        else:
            intor = 'int2e_sph'
        cintopt = make_cintopt(c_atm, c_bas, c_env, intor)
        cvhfopt = lib.c_null_ptr()
    else:
        vhfopt.set_dm(dms, atm, bas, env)
        cvhfopt = vhfopt._this
        cintopt = vhfopt._cintopt
        intor = vhfopt._intor
    cintor = _fpointer(intor)

    fdrv = getattr(libcvhf, 'CVHFnr_direct_drv')
    fdot = _fpointer('CVHFdot_nrs8')

    vj = vk = None
    dmsptr = []
    vjkptr = []
    fjk = []

    if with_j:
        fvj = _fpointer('CVHFnrs8_ji_s2kl')
        vj = numpy.empty((n_dm, nao, nao))
        for i, dm in enumerate(dms):
            dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
            vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
            fjk.append(fvj)

    if with_k:
        if hermi == 1:
            fvk = _fpointer('CVHFnrs8_li_s2kj')
        else:
            fvk = _fpointer('CVHFnrs8_li_s1kj')
        vk = numpy.empty((n_dm, nao, nao))
        for i, dm in enumerate(dms):
            dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
            vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p))
            fjk.append(fvk)

    shls_slice = (ctypes.c_int * 8)(*([0, c_bas.shape[0]] * 4))
    ao_loc = make_loc(bas, intor)
    n_ops = len(dmsptr)
    comp = 1
    fdrv(cintor, fdot, (ctypes.c_void_p * n_ops)(*fjk),
         (ctypes.c_void_p * n_ops)(*dmsptr),
         (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops),
         ctypes.c_int(comp), shls_slice,
         ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cvhfopt,
         c_atm.ctypes.data_as(ctypes.c_void_p), natm,
         c_bas.ctypes.data_as(ctypes.c_void_p), nbas,
         c_env.ctypes.data_as(ctypes.c_void_p))

    if with_j:
        # vj must be symmetric
        for i in range(n_dm):
            lib.hermi_triu(vj[i], 1, inplace=True)
        vj = vj.reshape(dms_shape)
    if with_k:
        if hermi != 0:
            for i in range(n_dm):
                lib.hermi_triu(vk[i], hermi, inplace=True)
        vk = vk.reshape(dms_shape)
    return vj, vk
Exemple #40
0
def get_jk(mols, dms, scripts=['ijkl,ji->kl'], intor='int2e_sph',
           aosym='s1', comp=None, hermi=0, shls_slice=None,
           verbose=logger.WARN, vhfopt=None):
    '''Compute J/K matrices for the given density matrix

    Args:
        mols : an instance of :class:`Mole` or a list of `Mole` objects

        dms : ndarray or list of ndarrays
            A density matrix or a list of density matrices

    Kwargs:
        hermi : int
            Whether the returned J (K) matrix is hermitian

            | 0 : no hermitian or symmetric
            | 1 : hermitian
            | 2 : anti-hermitian

        intor : str
            2-electron integral name.  See :func:`getints` for the complete
            list of available 2-electron integral names
        aosym : int or str
            Permutation symmetry for the AO integrals

            | 4 or '4' or 's4': 4-fold symmetry (default)
            | '2ij' or 's2ij' : symmetry between i, j in (ij|kl)
            | '2kl' or 's2kl' : symmetry between k, l in (ij|kl)
            | 1 or '1' or 's1': no symmetry
            | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl)
            | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl)
            | 'a2ij' : anti-symmetry between i, j in (ij|kl)
            | 'a2kl' : anti-symmetry between k, l in (ij|kl)

        comp : int
            Components of the integrals, e.g. cint2e_ip_sph has 3 components.
        scripts : string or a list of strings
            Contraction description (following numpy.einsum convention) based on
            letters [ijkl].  Each script will be one-to-one applied to each
            entry of dms.  So it must have the same number of elements as the
            dms, len(scripts) == len(dms).
        shls_slice : 8-element list
            (ish_start, ish_end, jsh_start, jsh_end, ksh_start, ksh_end, lsh_start, lsh_end)

    Returns:
        Depending on the number of density matrices, the function returns one
        J/K matrix or a list of J/K matrices (the same number of entries as the
        input dms).
        Each JK matrices may be a 2D array or 3D array if the AO integral
        has multiple components.

    Examples:

    >>> from pyscf import gto
    >>> mol = gto.M(atom='H 0 -.5 0; H 0 .5 0', basis='cc-pvdz')
    >>> nao = mol.nao_nr()
    >>> dm = numpy.random.random((nao,nao))
    >>> # Default, Coulomb matrix
    >>> vj = get_jk(mol, dm)
    >>> # Coulomb matrix with 8-fold permutation symmetry for AO integrals
    >>> vj = get_jk(mol, dm, 'ijkl,ji->kl', aosym='s8')
    >>> # Exchange matrix with 8-fold permutation symmetry for AO integrals
    >>> vk = get_jk(mol, dm, 'ijkl,jk->il', aosym='s8')
    >>> # Compute coulomb and exchange matrices together
    >>> vj, vk = get_jk(mol, (dm,dm), ('ijkl,ji->kl','ijkl,li->kj'), aosym='s8')
    >>> # Analytical gradients for coulomb matrix
    >>> j1 = get_jk(mol, dm, 'ijkl,lk->ij', intor='int2e_ip1_sph', aosym='s2kl', comp=3)

    >>> # contraction across two molecules
    >>> mol1 = gto.M(atom='He 2 0 0', basis='6-31g')
    >>> nao1 = mol1.nao_nr()
    >>> dm1 = numpy.random.random((nao1,nao1))
    >>> # Coulomb interaction between two molecules, note 4-fold symmetry can be applied
    >>> jcross = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', aosym='s4')
    >>> ecoul = numpy.einsum('ij,ij', jcross, dm1)
    >>> # Exchange interaction between two molecules, no symmetry can be used
    >>> kcross = get_jk((mol1,mol,mol,mol1), dm, scripts='ijkl,jk->il')
    >>> ex = numpy.einsum('ij,ji', kcross, dm1)

    >>> # Analytical gradients for coulomb matrix between two molecules
    >>> jcros1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3)
    >>> # Analytical gradients for coulomb interaction between 1s density and the other molecule
    >>> jpart1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3,
    ...                 shls_slice=(0,1,0,1,0,mol.nbas,0,mol.nbas))
    '''
    if isinstance(mols, (tuple, list)):
        intor, comp = gto.moleintor._get_intor_and_comp(mols[0]._add_suffix(intor), comp)
        assert(len(mols) == 4)
        assert(mols[0].cart == mols[1].cart == mols[2].cart == mols[3].cart)
        if shls_slice is None:
            shls_slice = numpy.array([(0, mol.nbas) for mol in mols])
        else:
            shls_slice = numpy.asarray(shls_slice).reshape(4,2)
# concatenate unique mols and build corresponding shls_slice
        mol_ids = [id(mol) for mol in mols]
        atm, bas, env = mols[0]._atm, mols[0]._bas, mols[0]._env
        bas_start = numpy.zeros(4, dtype=int)
        for m in range(1,4):
            first = mol_ids.index(mol_ids[m])
            if first == m:  # the unique mol, not repeated in mols
                bas_start[m] = bas.shape[0]
                atm, bas, env = gto.conc_env(atm, bas, env, mols[m]._atm,
                                             mols[m]._bas, mols[m]._env)
            else:
                bas_start[m] = bas_start[first]
            shls_slice[m] += bas_start[m]
        shls_slice = shls_slice.flatten()
    else:
        intor, comp = gto.moleintor._get_intor_and_comp(mols._add_suffix(intor), comp)
        atm, bas, env = mols._atm, mols._bas, mols._env
        if shls_slice is None:
            shls_slice = (0, mols.nbas) * 4

    single_script = isinstance(scripts, str)
    if single_script:
        scripts = [scripts]
    if isinstance(dms, numpy.ndarray) and dms.ndim == 2:
        dms = [dms]
    assert(len(scripts) == len(dms))

    #format scripts
    descript = []
    for script in scripts:
        dmsym, vsym = script.lower().split(',')[1].split('->')
        if vsym[:2] in ('a2', 's2', 's1'):
            descript.append(dmsym + '->' + vsym)
        elif hermi == 0:
            descript.append(dmsym + '->s1' + vsym)
        else:
            descript.append(dmsym + '->s2' + vsym)

    vs = _vhf.direct_bindm(intor, aosym, descript, dms, comp, atm, bas, env,
                           vhfopt=vhfopt, shls_slice=shls_slice)
    if hermi != 0:
        for v in vs:
            if v.ndim == 3:
                for vi in v:
                    lib.hermi_triu(vi, hermi, inplace=True)
            else:
                lib.hermi_triu(v, hermi, inplace=True)

    if single_script:
        vs = vs[0]
    return vs
Exemple #41
0
def get_jk_favorj(sgx, dm, hermi=1, with_j=True, with_k=True,
                  direct_scf_tol=1e-13):
    t0 = time.clock(), time.time()
    mol = sgx.mol
    grids = sgx.grids
    gthrd = sgx.grids_thrd

    dms = numpy.asarray(dm)
    dm_shape = dms.shape
    nao = dm_shape[-1]
    dms = dms.reshape(-1,nao,nao)
    nset = dms.shape[0]

    if sgx.debug:
        batch_nuc = _gen_batch_nuc(mol)
    else:
        batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol,
                                  sgx._opt)

    sn = numpy.zeros((nao,nao))
    ngrids = grids.coords.shape[0]
    max_memory = sgx.max_memory - lib.current_memory()[0]
    sblk = sgx.blockdim
    blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2))))
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1,None]
        sn += lib.dot(ao.T, wao)

    ovlp = mol.intor_symmetric('int1e_ovlp')
    proj = scipy.linalg.solve(sn, ovlp)
    proj_dm = lib.einsum('ki,xij->xkj', proj, dms)

    t1 = logger.timer_debug1(mol, "sgX initialziation", *t0)
    vj = numpy.zeros_like(dms)
    vk = numpy.zeros_like(dms)
    tnuc = 0, 0
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1,None]

        fg = lib.einsum('gi,xij->xgj', wao, proj_dm)
        mask = numpy.zeros(i1-i0, dtype=bool)
        for i in range(nset):
            mask |= numpy.any(fg[i]>gthrd, axis=1)
            mask |= numpy.any(fg[i]<-gthrd, axis=1)
        if not numpy.all(mask):
            ao = ao[mask]
            fg = fg[:,mask]
            coords = coords[mask]

        if with_j:
            rhog = numpy.einsum('xgu,gu->xg', fg, ao)
        else:
            rhog = None

        if sgx.debug:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            gbn = batch_nuc(mol, coords)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()
            if with_j:
                jpart = numpy.einsum('guv,xg->xuv', gbn, rhog)
            if with_k:
                gv = lib.einsum('gtv,xgt->xgv', gbn, fg)
            gbn = None
        else:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            if with_j: rhog = rhog.copy()
            jpart, gv = batch_jk(mol, coords, rhog, fg.copy())
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()

        if with_j:
            vj += jpart
        if with_k:
            for i in range(nset):
                vk[i] += lib.einsum('gu,gv->uv', ao, gv[i])
        jpart = gv = None

    t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
    tdot = t2[0] - t1[0] - tnuc[0] , t2[1] - t1[1] - tnuc[1]
    logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
                  'for tensor contraction (%.2f, %.2f)',
                  tnuc[0], tnuc[1], tdot[0], tdot[1])

    for i in range(nset):
        lib.hermi_triu(vj[i], inplace=True)
    if with_k and hermi == 1:
        vk = (vk + vk.transpose(0,2,1))*.5
    logger.timer(mol, "vj and vk", *t0)
    return vj.reshape(dm_shape), vk.reshape(dm_shape)
Exemple #42
0
def make_h1(mf,
            mo_coeff,
            mo_occ,
            chkfile=None,
            atmlst=None,
            verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:, mo_occ > 0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    ni = copy.copy(mf._numint)
    if USE_XCFUN:
        try:
            ni.libxc = dft.xcfun
            xctype = ni._xc_type(mf.xc)
        except (ImportError, KeyError, NotImplementedError):
            ni.libxc = dft.libxc
            xctype = ni._xc_type(mf.xc)
    else:
        xctype = ni._xc_type(mf.xc)
    grids = mf.grids
    hyb = ni.libxc.hybrid_coeff(mf.xc)
    max_memory = 4000

    h1a = -(mol.intor('int1e_ipkin', comp=3) +
            mol.intor('int1e_ipnuc', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('int1e_iprinv', comp=3)
        h1ao[:, p0:p1] += h1a[:, p0:p1]
        h1ao = h1ao + h1ao.transpose(0, 2, 1)

        shls_slice = (shl0, shl1) + (0, mol.nbas) * 3
        int2e_ip1 = mol._add_suffix('int2e_ip1')
        if abs(hyb) > 1e-10:
            vj1, vj2, vk1, vk2 = \
                    _vhf.direct_bindm(int2e_ip1, 's2kl',
                                      ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                      (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1 - hyb * .5 * vk1
            veff[:, p0:p1] += vj2 - hyb * .5 * vk2
        else:
            vj1, vj2 = \
                    _vhf.direct_bindm(int2e_ip1, 's2kl',
                                      ('ji->s2kl', 'lk->s1ij'),
                                      (-dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1
            veff[:, p0:p1] += vj2

        if xctype == 'LDA':
            ao_deriv = 1
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory):
                rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho = vxc[0]
                frr = fxc[0]
                half = lib.dot(ao[0], dm0[:, p0:p1].copy())
                rho1 = numpy.einsum('xpi,pi->xp', ao[1:, :, p0:p1], half)
                aow = numpy.einsum('pi,xp->xpi', ao[0], weight * frr * rho1)
                aow1 = numpy.einsum('xpi,p->xpi', ao[1:, :, p0:p1],
                                    weight * vrho)
                aow[:, :, p0:p1] += aow1
                veff[0] += lib.dot(-aow[0].T, ao[0])
                veff[1] += lib.dot(-aow[1].T, ao[0])
                veff[2] += lib.dot(-aow[2].T, ao[0])
                half = aow = aow1 = None

        elif xctype == 'GGA':

            def get_wv(rho, rho1, weight, vxc, fxc):
                vgamma = vxc[1]
                frr, frg, fgg = fxc[:3]
                ngrid = weight.size
                sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:])
                wv = numpy.empty((4, ngrid))
                wv[0] = frr * rho1[0]
                wv[0] += frg * sigma1 * 2
                wv[1:] = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:]
                wv[1:] += vgamma * rho1[1:] * 2
                wv *= weight
                return wv

            ao_deriv = 2
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory):
                rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho, vgamma = vxc[:2]
                # (d_X \nabla_x mu) nu DM_{mu,nu}
                half = lib.dot(ao[0], dm0[:, p0:p1].copy())
                rho1X = numpy.einsum('xpi,pi->xp', ao[[1, XX, XY, XZ], :,
                                                      p0:p1], half)
                rho1Y = numpy.einsum('xpi,pi->xp', ao[[2, YX, YY, YZ], :,
                                                      p0:p1], half)
                rho1Z = numpy.einsum('xpi,pi->xp', ao[[3, ZX, ZY, ZZ], :,
                                                      p0:p1], half)
                # (d_X mu) (\nabla_x nu) DM_{mu,nu}
                half = lib.dot(ao[1], dm0[:, p0:p1].copy())
                rho1X[1] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[1] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[1] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)
                half = lib.dot(ao[2], dm0[:, p0:p1].copy())
                rho1X[2] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[2] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[2] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)
                half = lib.dot(ao[3], dm0[:, p0:p1].copy())
                rho1X[3] += numpy.einsum('pi,pi->p', ao[1, :, p0:p1], half)
                rho1Y[3] += numpy.einsum('pi,pi->p', ao[2, :, p0:p1], half)
                rho1Z[3] += numpy.einsum('pi,pi->p', ao[3, :, p0:p1], half)

                wv = get_wv(rho, rho1X, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Y, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Z, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))

                wv = numpy.empty_like(rho)
                wv[0] = weight * vrho
                wv[1:] = rho[1:] * (weight * vgamma * 2)
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0, p0:p1] -= lib.dot(ao[1, :, p0:p1].T.copy(), aow)
                veff[1, p0:p1] -= lib.dot(ao[2, :, p0:p1].T.copy(), aow)
                veff[2, p0:p1] -= lib.dot(ao[3, :, p0:p1].T.copy(), aow)

                aow = numpy.einsum('npi,np->pi', ao[[XX, XY, XZ], :, p0:p1],
                                   wv[1:4])
                veff[0, p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[YX, YY, YZ], :, p0:p1],
                                   wv[1:4])
                veff[1, p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[ZX, ZY, ZZ], :, p0:p1],
                                   wv[1:4])
                veff[2, p0:p1] -= lib.dot(aow.T, ao[0])
        else:
            raise NotImplementedError('meta-GGA')

        veff = veff + veff.transpose(0, 2, 1)

        if chkfile is None:
            h1aos.append(h1ao + veff)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao + veff)
    if chkfile is None:
        return h1aos
    else:
        return chkfile
Exemple #43
0
def get_jk(mols,
           dms,
           scripts=['ijkl,ji->kl'],
           intor='int2e_sph',
           aosym='s1',
           comp=None,
           hermi=0,
           shls_slice=None,
           verbose=logger.WARN,
           vhfopt=None):
    '''Compute J/K matrices for the given density matrix

    Args:
        mols : an instance of :class:`Mole` or a list of `Mole` objects

        dms : ndarray or list of ndarrays
            A density matrix or a list of density matrices

    Kwargs:
        hermi : int
            Whether the returned J (K) matrix is hermitian

            | 0 : no hermitian or symmetric
            | 1 : hermitian
            | 2 : anti-hermitian

        intor : str
            2-electron integral name.  See :func:`getints` for the complete
            list of available 2-electron integral names
        aosym : int or str
            Permutation symmetry for the AO integrals

            | 4 or '4' or 's4': 4-fold symmetry (default)
            | '2ij' or 's2ij' : symmetry between i, j in (ij|kl)
            | '2kl' or 's2kl' : symmetry between k, l in (ij|kl)
            | 1 or '1' or 's1': no symmetry
            | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl)
            | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl)
            | 'a2ij' : anti-symmetry between i, j in (ij|kl)
            | 'a2kl' : anti-symmetry between k, l in (ij|kl)

        comp : int
            Components of the integrals, e.g. cint2e_ip_sph has 3 components.
        scripts : string or a list of strings
            Contraction description (following numpy.einsum convention) based on
            letters [ijkl].  Each script will be one-to-one applied to each
            entry of dms.  So it must have the same number of elements as the
            dms, len(scripts) == len(dms).
        shls_slice : 8-element list
            (ish_start, ish_end, jsh_start, jsh_end, ksh_start, ksh_end, lsh_start, lsh_end)

    Returns:
        Depending on the number of density matrices, the function returns one
        J/K matrix or a list of J/K matrices (the same number of entries as the
        input dms).
        Each JK matrices may be a 2D array or 3D array if the AO integral
        has multiple components.

    Examples:

    >>> from pyscf import gto
    >>> mol = gto.M(atom='H 0 -.5 0; H 0 .5 0', basis='cc-pvdz')
    >>> nao = mol.nao_nr()
    >>> dm = numpy.random.random((nao,nao))
    >>> # Default, Coulomb matrix
    >>> vj = get_jk(mol, dm)
    >>> # Coulomb matrix with 8-fold permutation symmetry for AO integrals
    >>> vj = get_jk(mol, dm, 'ijkl,ji->kl', aosym='s8')
    >>> # Exchange matrix with 8-fold permutation symmetry for AO integrals
    >>> vk = get_jk(mol, dm, 'ijkl,jk->il', aosym='s8')
    >>> # Compute coulomb and exchange matrices together
    >>> vj, vk = get_jk(mol, (dm,dm), ('ijkl,ji->kl','ijkl,li->kj'), aosym='s8')
    >>> # Analytical gradients for coulomb matrix
    >>> j1 = get_jk(mol, dm, 'ijkl,lk->ij', intor='int2e_ip1_sph', aosym='s2kl', comp=3)

    >>> # contraction across two molecules
    >>> mol1 = gto.M(atom='He 2 0 0', basis='6-31g')
    >>> nao1 = mol1.nao_nr()
    >>> dm1 = numpy.random.random((nao1,nao1))
    >>> # Coulomb interaction between two molecules, note 4-fold symmetry can be applied
    >>> jcross = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', aosym='s4')
    >>> ecoul = numpy.einsum('ij,ij', jcross, dm1)
    >>> # Exchange interaction between two molecules, no symmetry can be used
    >>> kcross = get_jk((mol1,mol,mol,mol1), dm, scripts='ijkl,jk->il')
    >>> ex = numpy.einsum('ij,ji', kcross, dm1)

    >>> # Analytical gradients for coulomb matrix between two molecules
    >>> jcros1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3)
    >>> # Analytical gradients for coulomb interaction between 1s density and the other molecule
    >>> jpart1 = get_jk((mol1,mol1,mol,mol), dm, scripts='ijkl,lk->ij', intor='int2e_ip1_sph', comp=3,
    ...                 shls_slice=(0,1,0,1,0,mol.nbas,0,mol.nbas))
    '''
    if isinstance(mols, (tuple, list)):
        intor, comp = gto.moleintor._get_intor_and_comp(
            mols[0]._add_suffix(intor), comp)
        assert (len(mols) == 4)
        assert (mols[0].cart == mols[1].cart == mols[2].cart == mols[3].cart)
        if shls_slice is None:
            shls_slice = numpy.array([(0, mol.nbas) for mol in mols])
        else:
            shls_slice = numpy.asarray(shls_slice).reshape(4, 2)

        # concatenate unique mols and build corresponding shls_slice
        mol_ids = [id(mol) for mol in mols]
        atm, bas, env = mols[0]._atm, mols[0]._bas, mols[0]._env
        bas_start = numpy.zeros(4, dtype=int)
        for m in range(1, 4):
            first = mol_ids.index(mol_ids[m])
            if first == m:  # the unique mol, not repeated in mols
                bas_start[m] = bas.shape[0]
                atm, bas, env = gto.conc_env(atm, bas, env, mols[m]._atm,
                                             mols[m]._bas, mols[m]._env)
            else:
                bas_start[m] = bas_start[first]
            shls_slice[m] += bas_start[m]
        shls_slice = shls_slice.flatten()
    else:
        intor, comp = gto.moleintor._get_intor_and_comp(
            mols._add_suffix(intor), comp)
        atm, bas, env = mols._atm, mols._bas, mols._env
        if shls_slice is None:
            shls_slice = (0, mols.nbas) * 4

    single_script = isinstance(scripts, str)
    if single_script:
        scripts = [scripts]
    # Check if letters other than ijkl were provided.
    if set(''.join(scripts[:4])).difference('ijkl,->as12'):
        # Translate these letters to ijkl if possible
        scripts = [
            script.translate({
                ord(script[0]): 'i',
                ord(script[1]): 'j',
                ord(script[2]): 'k',
                ord(script[3]): 'l'
            }) for script in scripts
        ]
        if set(''.join(scripts[:4])).difference('ijkl,->as12'):
            raise RuntimeError('Scripts unsupported %s' % scripts)

    if isinstance(dms, numpy.ndarray) and dms.ndim == 2:
        dms = [dms]
    assert (len(scripts) == len(dms))

    #format scripts
    descript = []
    for script in scripts:
        dmsym, vsym = script.lower().split(',')[1].split('->')
        if vsym[:2] in ('a2', 's2', 's1'):
            descript.append(dmsym + '->' + vsym)
        elif hermi == 0:
            descript.append(dmsym + '->s1' + vsym)
        else:
            descript.append(dmsym + '->s2' + vsym)

    vs = _vhf.direct_bindm(intor,
                           aosym,
                           descript,
                           dms,
                           comp,
                           atm,
                           bas,
                           env,
                           vhfopt=vhfopt,
                           shls_slice=shls_slice)
    if hermi != 0:
        for v in vs:
            if v.ndim == 3:
                for vi in v:
                    lib.hermi_triu(vi, hermi, inplace=True)
            else:
                lib.hermi_triu(v, hermi, inplace=True)

    if single_script:
        vs = vs[0]
    return vs
Exemple #44
0
def pspace (fci, h1e, eri, norb, nelec, transformer, hdiag_det=None, hdiag_csf=None, npsp=200):
    ''' Note that getting pspace for npsp CSFs is substantially more costly than getting it for npsp determinants,
    until I write code than can evaluate Hamiltonian matrix elements of CSFs directly. On the other hand
    a pspace of determinants contains many redundant degrees of freedom for the same reason. Therefore I have
    reduced the default pspace size by a factor of 2.'''
    if norb > 63:
        raise NotImplementedError('norb > 63')

    t0 = (time.process_time (), time.time ())
    neleca, nelecb = _unpack_nelec(nelec)
    h1e = np.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag_det is None:
        hdiag_det = fci.make_hdiag(h1e, eri, norb, nelec)
    if hdiag_csf is None:
        hdiag_csf = fci.make_hdiag_csf(h1e, eri, norb, nelec, hdiag_det=hdiag_det)
    csf_addr = np.arange (hdiag_csf.size, dtype=np.int)
    if transformer.wfnsym is None:
        ncsf_sym = hdiag_csf.size
    else:
        idx_sym = transformer.confsym[transformer.econf_csf_mask] == transformer.wfnsym
        ncsf_sym = np.count_nonzero (idx_sym)
        csf_addr = csf_addr[idx_sym]
    if ncsf_sym > npsp:
        try:
            csf_addr = csf_addr[np.argpartition(hdiag_csf[csf_addr], npsp-1)[:npsp]]
        except AttributeError:
            csf_addr = csf_addr[np.argsort(hdiag_csf[csf_addr])[:npsp]]

    # To build 
    econf_addr = np.unique (transformer.econf_csf_mask[csf_addr])
    det_addr = np.concatenate ([np.nonzero (transformer.econf_det_mask == conf)[0]
        for conf in econf_addr])
    lib.logger.debug (fci, ("csf.pspace: Lowest-energy %s CSFs correspond to %s configurations"
        " which are spanned by %s determinants"), npsp, econf_addr.size, det_addr.size)

    addra, addrb = divmod(det_addr, nb)
    stra = cistring.addrs2str(norb, neleca, addra)
    strb = cistring.addrs2str(norb, nelecb, addrb)
    npsp_det = len(det_addr)
    h0 = np.zeros((npsp_det,npsp_det))
    h1e_ab = unpack_h1e_ab (h1e)
    h1e_a = np.ascontiguousarray(h1e_ab[0])
    h1e_b = np.ascontiguousarray(h1e_ab[1])
    g2e = ao2mo.restore(1, eri, norb)
    g2e_ab = g2e_bb = g2e_aa = g2e
    _debug_g2e (fci, g2e, eri, norb) # Exploring g2e nan bug; remove later?
    t0 = lib.logger.timer (fci, "csf.pspace: index manipulation", *t0)
    libfci.FCIpspace_h0tril_uhf(h0.ctypes.data_as(ctypes.c_void_p),
                                h1e_a.ctypes.data_as(ctypes.c_void_p),
                                h1e_b.ctypes.data_as(ctypes.c_void_p),
                                g2e_aa.ctypes.data_as(ctypes.c_void_p),
                                g2e_ab.ctypes.data_as(ctypes.c_void_p),
                                g2e_bb.ctypes.data_as(ctypes.c_void_p),
                                stra.ctypes.data_as(ctypes.c_void_p),
                                strb.ctypes.data_as(ctypes.c_void_p),
                                ctypes.c_int(norb), ctypes.c_int(npsp_det))
    t0 = lib.logger.timer (fci, "csf.pspace: pspace Hamiltonian in determinant basis", *t0)

    for i in range(npsp_det):
        h0[i,i] = hdiag_det[det_addr[i]]
    h0 = lib.hermi_triu(h0)

    try:
        if fci.verbose >= lib.logger.DEBUG: evals_before = scipy.linalg.eigh (h0)[0]
    except ValueError as e:
        lib.logger.debug (fci, ("ERROR: h0 has {} infs, {} nans; h1e_a has {} infs, {} nans; "
            "h1e_b has {} infs, {} nans; g2e has {} infs, {} nans, norb = {}, npsp_det = {}").format (
            np.count_nonzero (np.isinf (h0)), np.count_nonzero (np.isnan (h0)),
            np.count_nonzero (np.isinf (h1e_a)), np.count_nonzero (np.isnan (h1e_a)),
            np.count_nonzero (np.isinf (h1e_b)), np.count_nonzero (np.isnan (h1e_b)),
            np.count_nonzero (np.isinf (g2e)), np.count_nonzero (np.isnan (g2e)),
            norb, npsp_det))
        evals_before = np.zeros (npsp_det)

    h0, csf_addr = transformer.mat_det2csf_confspace (h0, econf_addr)
    t0 = lib.logger.timer (fci, "csf.pspace: transform pspace Hamiltonian into CSF basis", *t0)

    if fci.verbose >= lib.logger.DEBUG:
        lib.logger.debug2 (fci, "csf.pspace: eigenvalues of h0 before transformation %s", evals_before)
        evals_after = scipy.linalg.eigh (h0)[0]
        lib.logger.debug2 (fci, "csf.pspace: eigenvalues of h0 after transformation %s", evals_after)
        idx = [np.argmin (np.abs (evals_before - ev)) for ev in evals_after]
        resid = evals_after - evals_before[idx]
        lib.logger.debug2 (fci, "csf.pspace: best h0 eigenvalue matching differences after transformation: %s", resid)
        lib.logger.debug (fci, "csf.pspace: if the transformation of h0 worked the following number will be zero: %s", np.max (np.abs(resid)))

    # We got extra CSFs from building the configurations most of the time.
    if csf_addr.size > npsp:
        try:
            csf_addr_2 = np.argpartition(np.diag (h0), npsp-1)[:npsp]
        except AttributeError:
            csf_addr_2 = np.argsort(np.diag (h0))[:npsp]
        csf_addr = csf_addr[csf_addr_2]
        h0 = h0[np.ix_(csf_addr_2,csf_addr_2)]
    npsp_csf = csf_addr.size
    lib.logger.debug (fci, "csf_solver.pspace: asked for %s-CSF pspace; found %s CSFs", npsp, npsp_csf)

    t0 = lib.logger.timer (fci, "csf.pspace wrapup", *t0)
    return csf_addr, h0
    def test_direct_jk_s2(self):
        numpy.random.seed(15)

        dm1 = numpy.random.random((nao,nao))
        dm1 = dm1 + dm1.T
        vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1)
        vj1, vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs8',
                         'CVHFnrs8_ji_s2kl', 'CVHFnrs8_jk_s2il')
        self.assertTrue(numpy.allclose(vj0,vj1))
        self.assertTrue(numpy.allclose(vk0,vk1))

        eri1 = ao2mo.restore(1, rhf._eri, nao)
        vj0 = numpy.einsum('ijkl,kl->ij', eri1, dm1)
        vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1)
        vj1, vj2 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4',
                         'CVHFnrs4_ji_s2kl', 'CVHFnrs4_jk_s2il')
        self.assertTrue(numpy.allclose(vj0,vj1))
        self.assertTrue(numpy.allclose(vk0,vj2))

        vj1, vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4',
                           'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il')
        self.assertTrue(numpy.allclose(vk0,vj1))
        self.assertTrue(numpy.allclose(vk0,vk1))

        vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1)
        vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs4',
                    'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il',
                    'CVHFnrs4_li_s2kj', 'CVHFnrs4_jk_s2il')
        self.assertTrue(numpy.allclose(vk0,vk1[0]))
        self.assertTrue(numpy.allclose(vk0,vk1[1]))
        self.assertTrue(numpy.allclose(vk0,vk1[2]))
        self.assertTrue(numpy.allclose(vk0,vk1[3]))

        vj0 = numpy.einsum('ijkl,kl->ij', eri1, dm1)
        vk0 = numpy.einsum('ijkl,jk->il', eri1, dm1)
        vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs2kl',
                    'CVHFnrs2kl_ji_s2kl', 'CVHFnrs2kl_lk_s2ij',
                    'CVHFnrs2kl_jk_s2il', 'CVHFnrs2kl_li_s2kj')
        self.assertTrue(numpy.allclose(vj0,vk1[0]))
        self.assertTrue(numpy.allclose(vj0,vk1[1]))
        self.assertTrue(numpy.allclose(vk0,vk1[2]))
        self.assertTrue(numpy.allclose(vk0,vk1[3]))

        vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs2ij',
                    'CVHFnrs2ij_ji_s2kl', 'CVHFnrs2ij_lk_s2ij',
                    'CVHFnrs2ij_jk_s2il', 'CVHFnrs2ij_li_s2kj')
        self.assertTrue(numpy.allclose(vj0,vk1[0]))
        self.assertTrue(numpy.allclose(vj0,vk1[1]))
        self.assertTrue(numpy.allclose(vk0,vk1[2]))
        self.assertTrue(numpy.allclose(vk0,vk1[3]))

        vk1 = runjks2(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs1',
                    'CVHFnrs1_ji_s2kl', 'CVHFnrs1_lk_s2ij',
                    'CVHFnrs1_jk_s2il', 'CVHFnrs1_li_s2kj')
        self.assertTrue(numpy.allclose(vj0,vk1[0]))
        self.assertTrue(numpy.allclose(vj0,vk1[1]))
        self.assertTrue(numpy.allclose(vk0,vk1[2]))
        self.assertTrue(numpy.allclose(vk0,vk1[3]))

        vj0, vk0 = scf._vhf.incore(rhf._eri, dm1, 1)
        vj1, vk1 = runjk(dm1, 1, 'cint2e_sph', 'CVHFdot_nrs8',
                         'CVHFnrs8_ji_s2kl', 'CVHFnrs8_jk_s2il')
        vj1 = lib.hermi_triu(vj1, 1)
        vk1 = lib.hermi_triu(vk1, 1)
        self.assertTrue(numpy.allclose(vj0,vj1))
        self.assertTrue(numpy.allclose(vk0,vk1))
Exemple #46
0
def incore(eri, dms, hermi=0, with_j=True, with_k=True):
    assert (eri.dtype == numpy.double)
    eri = numpy.asarray(eri, order='C')
    dms = numpy.asarray(dms, order='C')
    dms_shape = dms.shape
    nao = dms_shape[-1]

    dms = dms.reshape(-1, nao, nao)
    n_dm = dms.shape[0]

    vj = vk = None
    if with_j:
        vj = numpy.zeros((n_dm, nao, nao))
    if with_k:
        vk = numpy.zeros((n_dm, nao, nao))

    dmsptr = []
    vjkptr = []
    fjkptr = []

    npair = nao * (nao + 1) // 2
    if eri.ndim == 2 and npair * npair == eri.size:  # 4-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv')
        if with_j:
            # 'ijkl,kl->ij'
            fvj = _fpointer('CVHFics4_kl_s2ij')
            # or
            ## 'ijkl,ij->kl'
            #fvj = _fpointer('CVHFics4_ij_s2kl')
            for i, dm in enumerate(dms):
                dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
                vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
                fjkptr.append(fvj)
        if with_k:
            # 'ijkl,il->jk'
            fvk = _fpointer('CVHFics4_il_s1jk')
            # or
            ## 'ijkl,jk->il'
            #fvk = _fpointer('CVHFics4_jk_s1il')
            for i, dm in enumerate(dms):
                dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
                vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p))
                fjkptr.append(fvk)

    elif eri.ndim == 1 and npair * (npair +
                                    1) // 2 == eri.size:  # 8-fold symmetry eri
        fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv')
        if with_j:
            fvj = _fpointer('CVHFics8_tridm_vj')
            tridms = lib.pack_tril(lib.hermi_sum(dms, axes=(0, 2, 1)))
            idx = numpy.arange(nao)
            tridms[:, idx * (idx + 1) // 2 + idx] *= .5
            for i, tridm in enumerate(tridms):
                dmsptr.append(tridm.ctypes.data_as(ctypes.c_void_p))
                vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
                fjkptr.append(fvj)
        if with_k:
            if hermi == 1:
                fvk = _fpointer('CVHFics8_jk_s2il')
            else:
                fvk = _fpointer('CVHFics8_jk_s1il')
            for i, dm in enumerate(dms):
                dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
                vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p))
                fjkptr.append(fvk)
    else:
        raise RuntimeError('Array shape not consistent: DM %s, eri %s' %
                           (dms_shape, eri.shape))

    n_ops = len(dmsptr)
    fdrv(eri.ctypes.data_as(ctypes.c_void_p),
         (ctypes.c_void_p * n_ops)(*dmsptr),
         (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops),
         ctypes.c_int(nao), (ctypes.c_void_p * n_ops)(*fjkptr))

    if with_j:
        for i in range(n_dm):
            lib.hermi_triu(vj[i], 1, inplace=True)
        vj = vj.reshape(dms_shape)
    if with_k:
        if hermi != 0:
            for i in range(n_dm):
                lib.hermi_triu(vk[i], hermi, inplace=True)
        vk = vk.reshape(dms_shape)
    return vj, vk
Exemple #47
0
def direct(dms, atm, bas, env, vhfopt=None, hermi=0, cart=False):
    c_atm = numpy.asarray(atm, dtype=numpy.int32, order='C')
    c_bas = numpy.asarray(bas, dtype=numpy.int32, order='C')
    c_env = numpy.asarray(env, dtype=numpy.double, order='C')
    natm = ctypes.c_int(c_atm.shape[0])
    nbas = ctypes.c_int(c_bas.shape[0])

    if isinstance(dms, numpy.ndarray) and dms.ndim == 2:
        dms = dms[numpy.newaxis,:,:]
    n_dm = len(dms)
    nao = dms[0].shape[0]
    dms = numpy.asarray(dms, order='C')

    if vhfopt is None:
        if cart:
            intor = 'int2e_cart'
        else:
            intor = 'int2e_sph'
        cintopt = make_cintopt(c_atm, c_bas, c_env, intor)
        cvhfopt = lib.c_null_ptr()
    else:
        vhfopt.set_dm(dms, atm, bas, env)
        cvhfopt = vhfopt._this
        cintopt = vhfopt._cintopt
        intor = vhfopt._intor
    cintor = _fpointer(intor)

    fdrv = getattr(libcvhf, 'CVHFnr_direct_drv')
    fdot = _fpointer('CVHFdot_nrs8')
    fvj = _fpointer('CVHFnrs8_ji_s2kl')
    if hermi == 1:
        fvk = _fpointer('CVHFnrs8_li_s2kj')
    else:
        fvk = _fpointer('CVHFnrs8_li_s1kj')
    vjk = numpy.empty((2,n_dm,nao,nao))
    fjk = (ctypes.c_void_p*(2*n_dm))()
    dmsptr = (ctypes.c_void_p*(2*n_dm))()
    vjkptr = (ctypes.c_void_p*(2*n_dm))()
    for i in range(n_dm):
        dmsptr[i] = dms[i].ctypes.data_as(ctypes.c_void_p)
        vjkptr[i] = vjk[0,i].ctypes.data_as(ctypes.c_void_p)
        fjk[i] = fvj
    for i in range(n_dm):
        dmsptr[n_dm+i] = dms[i].ctypes.data_as(ctypes.c_void_p)
        vjkptr[n_dm+i] = vjk[1,i].ctypes.data_as(ctypes.c_void_p)
        fjk[n_dm+i] = fvk
    shls_slice = (ctypes.c_int*8)(*([0, c_bas.shape[0]]*4))
    ao_loc = make_loc(bas, intor)

    fdrv(cintor, fdot, fjk, dmsptr, vjkptr,
         ctypes.c_int(n_dm*2), ctypes.c_int(1),
         shls_slice, ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cvhfopt,
         c_atm.ctypes.data_as(ctypes.c_void_p), natm,
         c_bas.ctypes.data_as(ctypes.c_void_p), nbas,
         c_env.ctypes.data_as(ctypes.c_void_p))

    # vj must be symmetric
    for idm in range(n_dm):
        vjk[0,idm] = lib.hermi_triu(vjk[0,idm], 1)
    if hermi != 0: # vk depends
        for idm in range(n_dm):
            vjk[1,idm] = lib.hermi_triu(vjk[1,idm], hermi)
    if n_dm == 1:
        vjk = vjk.reshape(2,nao,nao)
    return vjk
Exemple #48
0
def make_h1(mf, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=logger.WARN):
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mf.stdout, mf.verbose)
    mol = mf.mol
    if atmlst is None:
        atmlst = range(mol.natm)

    nao, nmo = mo_coeff.shape
    mocc = mo_coeff[:,mo_occ>0]
    dm0 = numpy.dot(mocc, mocc.T) * 2

    ni = copy.copy(mf._numint)
    if USE_XCFUN:
        try:
            ni.libxc = dft.xcfun
            xctype = ni._xc_type(mf.xc)
        except (ImportError, KeyError, NotImplementedError):
            ni.libxc = dft.libxc
            xctype = ni._xc_type(mf.xc)
    else:
        xctype = ni._xc_type(mf.xc)
    grids = mf.grids
    hyb = ni.libxc.hybrid_coeff(mf.xc)
    max_memory = 4000

    h1a =-(mol.intor('cint1e_ipkin_sph', comp=3) +
           mol.intor('cint1e_ipnuc_sph', comp=3))

    offsetdic = mol.offset_nr_by_atom()
    h1aos = []
    for i0, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]

        mol.set_rinv_origin(mol.atom_coord(ia))
        h1ao = -mol.atom_charge(ia) * mol.intor('cint1e_iprinv_sph', comp=3)
        h1ao[:,p0:p1] += h1a[:,p0:p1]
        h1ao = h1ao + h1ao.transpose(0,2,1)

        shls_slice = (shl0, shl1) + (0, mol.nbas)*3
        if abs(hyb) > 1e-10:
            vj1, vj2, vk1, vk2 = \
                    _vhf.direct_bindm('cint2e_ip1_sph', 's2kl',
                                      ('ji->s2kl', 'lk->s1ij', 'li->s1kj', 'jk->s1il'),
                                      (-dm0[:,p0:p1], -dm0, -dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1 - hyb*.5*vk1
            veff[:,p0:p1] += vj2 - hyb*.5*vk2
        else:
            vj1, vj2 = \
                    _vhf.direct_bindm('cint2e_ip1_sph', 's2kl',
                                      ('ji->s2kl', 'lk->s1ij'),
                                      (-dm0[:,p0:p1], -dm0),
                                      3, mol._atm, mol._bas, mol._env,
                                      shls_slice=shls_slice)
            for i in range(3):
                lib.hermi_triu(vj1[i], 1)
            veff = vj1
            veff[:,p0:p1] += vj2

        if xctype == 'LDA':
            ao_deriv = 1
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab):
                rho = ni.eval_rho2(mol, ao[0], mo_coeff, mo_occ, mask, 'LDA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho = vxc[0]
                frr = fxc[0]
                half = lib.dot(ao[0], dm0[:,p0:p1].copy())
                rho1 = numpy.einsum('xpi,pi->xp', ao[1:,:,p0:p1], half)
                aow = numpy.einsum('pi,xp->xpi', ao[0], weight*frr*rho1)
                aow1 = numpy.einsum('xpi,p->xpi', ao[1:,:,p0:p1], weight*vrho)
                aow[:,:,p0:p1] += aow1
                veff[0] += lib.dot(-aow[0].T, ao[0])
                veff[1] += lib.dot(-aow[1].T, ao[0])
                veff[2] += lib.dot(-aow[2].T, ao[0])
                half = aow = aow1 = None

        elif xctype == 'GGA':
            def get_wv(rho, rho1, weight, vxc, fxc):
                vgamma = vxc[1]
                frr, frg, fgg = fxc[:3]
                ngrid = weight.size
                sigma1 = numpy.einsum('xi,xi->i', rho[1:], rho1[1:])
                wv = numpy.empty((4,ngrid))
                wv[0]  = frr * rho1[0]
                wv[0] += frg * sigma1 * 2
                wv[1:]  = (fgg * sigma1 * 4 + frg * rho1[0] * 2) * rho[1:]
                wv[1:] += vgamma * rho1[1:] * 2
                wv *= weight
                return wv
            ao_deriv = 2
            for ao, mask, weight, coords \
                    in ni.block_loop(mol, grids, nao, ao_deriv, max_memory, ni.non0tab):
                rho = ni.eval_rho2(mol, ao[:4], mo_coeff, mo_occ, mask, 'GGA')
                vxc, fxc = ni.eval_xc(mf.xc, rho, 0, deriv=2)[1:3]
                vrho, vgamma = vxc[:2]
                # (d_X \nabla_x mu) nu DM_{mu,nu}
                half = lib.dot(ao[0], dm0[:,p0:p1].copy())
                rho1X = numpy.einsum('xpi,pi->xp', ao[[1,XX,XY,XZ],:,p0:p1], half)
                rho1Y = numpy.einsum('xpi,pi->xp', ao[[2,YX,YY,YZ],:,p0:p1], half)
                rho1Z = numpy.einsum('xpi,pi->xp', ao[[3,ZX,ZY,ZZ],:,p0:p1], half)
                # (d_X mu) (\nabla_x nu) DM_{mu,nu}
                half = lib.dot(ao[1], dm0[:,p0:p1].copy())
                rho1X[1] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[1] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[1] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)
                half = lib.dot(ao[2], dm0[:,p0:p1].copy())
                rho1X[2] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[2] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[2] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)
                half = lib.dot(ao[3], dm0[:,p0:p1].copy())
                rho1X[3] += numpy.einsum('pi,pi->p', ao[1,:,p0:p1], half)
                rho1Y[3] += numpy.einsum('pi,pi->p', ao[2,:,p0:p1], half)
                rho1Z[3] += numpy.einsum('pi,pi->p', ao[3,:,p0:p1], half)

                wv = get_wv(rho, rho1X, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Y, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[1] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))
                wv = get_wv(rho, rho1Z, weight, vxc, fxc)
                wv[0] *= .5
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[2] -= lib.transpose_sum(lib.dot(aow.T, ao[0]))

                wv = numpy.empty_like(rho)
                wv[0]  = weight * vrho
                wv[1:] = rho[1:] * (weight * vgamma * 2)
                aow = numpy.einsum('npi,np->pi', ao[:4], wv)
                veff[0,p0:p1] -= lib.dot(ao[1,:,p0:p1].T.copy(), aow)
                veff[1,p0:p1] -= lib.dot(ao[2,:,p0:p1].T.copy(), aow)
                veff[2,p0:p1] -= lib.dot(ao[3,:,p0:p1].T.copy(), aow)

                aow = numpy.einsum('npi,np->pi', ao[[XX,XY,XZ],:,p0:p1], wv[1:4])
                veff[0,p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[YX,YY,YZ],:,p0:p1], wv[1:4])
                veff[1,p0:p1] -= lib.dot(aow.T, ao[0])
                aow = numpy.einsum('npi,np->pi', ao[[ZX,ZY,ZZ],:,p0:p1], wv[1:4])
                veff[2,p0:p1] -= lib.dot(aow.T, ao[0])
        else:
            raise NotImplementedError('meta-GGA')

        veff = veff + veff.transpose(0,2,1)

        if chkfile is None:
            h1aos.append(h1ao+veff)
        else:
            key = 'scf_h1ao/%d' % ia
            lib.chkfile.save(chkfile, key, h1ao+veff)
    if chkfile is None:
        return h1aos
    else:
        return chkfile