Example #1
0
def _guess_shell_ranges(mol, buflen, aosym):
    from pyscf.ao2mo.outcore import balance_partition
    ao_loc = mol.ao_loc_nr()
    if 's2' in aosym:
        return balance_partition(ao_loc * (ao_loc + 1) // 2, buflen)
    else:
        nao = ao_loc[-1]
        return balance_partition(ao_loc * nao, buflen)
Example #2
0
def _guess_shell_ranges(mol, buflen, aosym):
    from pyscf.ao2mo.outcore import balance_partition
    ao_loc = mol.ao_loc_nr()
    if 's2' in aosym:
        return balance_partition(ao_loc*(ao_loc+1)//2, buflen)
    else:
        nao = ao_loc[-1]
        return balance_partition(ao_loc*nao, buflen)
Example #3
0
def get_int3c_mo(mol,
                 auxmol,
                 mo_coeff,
                 compact=getattr(__config__, 'df_df_DF_ao2mo_compact', True),
                 max_memory=None):
    ''' Evaluate (P|uv) c_ui c_vj -> (P|ij)

    Args:
        mol: gto.Mole
        auxmol: gto.Mole, contains auxbasis
        mo_coeff: ndarray, list, or tuple containing MO coefficients
            if two ndarrays mo_coeff = (mo0, mo1) are provided, mo0 and mo1 are
            used for the two AO dimensions

    Kwargs:
        compact: bool
            If true, will return only unique ERIs along the two MO dimensions.
            Does nothing if mo_coeff contains two different sets of orbitals.
        max_memory: int
            Maximum memory consumption in MB

    Returns:
        int3c: ndarray of shape (naux, nmo0, nmo1) or (naux, nmo*(nmo+1)//2) '''

    nao, naux, nbas, nauxbas = mol.nao, auxmol.nao, mol.nbas, auxmol.nbas
    npair = nao * (nao + 1) // 2
    if max_memory is None: max_memory = mol.max_memory

    # Separate mo_coeff
    if isinstance(mo_coeff, np.ndarray) and mo_coeff.ndim == 2:
        mo0 = mo1 = mo_coeff
    else:
        mo0, mo1 = mo_coeff[0], mo_coeff[1]
    nmo0, nmo1 = mo0.shape[-1], mo1.shape[-1]
    mosym, nmo_pair, mo_conc, mo_slice = _conc_mos(mo0, mo1, compact=compact)

    # (P|uv) -> (P|ij)
    get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij')
    int3c = np.zeros((naux, nmo_pair), dtype=mo0.dtype)
    max_memory -= lib.current_memory()[0]
    blksize = int(min(max(max_memory * 1e6 / 8 / (npair * 2), 20), 240))
    aux_loc = auxmol.ao_loc
    aux_ranges = balance_partition(aux_loc, blksize)
    for shl0, shl1, nL in aux_ranges:
        int3c_ao = get_int3c((0, nbas, 0, nbas, shl0, shl1))  # (uv|P)
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        int3c_ao = int3c_ao.T  # is apparently stored f-contiguous but in the actual memory order I need, so just transpose
        int3c[p0:p1] = _ao2mo.nr_e2(int3c_ao,
                                    mo_conc,
                                    mo_slice,
                                    aosym='s2',
                                    mosym=mosym,
                                    out=int3c[p0:p1])
        int3c_ao = None

    # Shape and return
    if 's1' in mosym: int3c = int3c.reshape(naux, nmo0, nmo1)
    return int3c
Example #4
0
def get_jk(mf_grad, mol=None, dm=None, hermi=0, with_j=True, with_k=True):
    if mol is None: mol = mf_grad.mol
    #if dm is None: dm = mf_grad.base.make_rdm1()
    #TODO: dm has to be the SCF density matrix in this version.  dm should be
    # extended to any 1-particle density matrix
    dm = mf_grad.base.make_rdm1()

    with_df = mf_grad.base.with_df
    auxmol = with_df.auxmol
    if auxmol is None:
        auxmol = df.addons.make_auxmol(with_df.mol, with_df.auxbasis)
    pmol = mol + auxmol
    ao_loc = mol.ao_loc
    nbas = mol.nbas
    nauxbas = auxmol.nbas

    get_int3c_s1 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's1')
    get_int3c_s2 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij')
    get_int3c_ip1 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip1', 's1')
    get_int3c_ip2 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij')

    nao = mol.nao
    naux = auxmol.nao
    dms = numpy.asarray(dm)
    out_shape = dms.shape[:-2] + (3, ) + dms.shape[-2:]
    dms = dms.reshape(-1, nao, nao)
    nset = dms.shape[0]

    auxslices = auxmol.aoslice_by_atom()
    aux_loc = auxmol.ao_loc
    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = int(min(max(max_memory * .5e6 / 8 / (nao**2 * 3), 20), naux,
                      240))
    ao_ranges = balance_partition(aux_loc, blksize)

    if not with_k:
        idx = numpy.arange(nao)
        dm_tril = dms + dms.transpose(0, 2, 1)
        dm_tril[:, idx, idx] *= .5
        dm_tril = lib.pack_tril(dm_tril)

        # (i,j|P)
        rhoj = numpy.empty((nset, naux))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_s2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            rhoj[:, p0:p1] = numpy.einsum('wp,nw->np', int3c, dm_tril)
            int3c = None

        # (P|Q)
        int2c = auxmol.intor('int2c2e', aosym='s1')
        rhoj = scipy.linalg.solve(int2c, rhoj.T, sym_pos=True).T
        int2c = None

        # (d/dX i,j|P)
        vj = numpy.zeros((nset, 3, nao, nao))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            vj += numpy.einsum('xijp,np->nxij', int3c, rhoj[:, p0:p1])
            int3c = None

        if mf_grad.auxbasis_response:
            # (i,j|d/dX P)
            vjaux = numpy.empty((3, naux))
            for shl0, shl1, nL in ao_ranges:
                int3c = get_int3c_ip2(
                    (0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
                p0, p1 = aux_loc[shl0], aux_loc[shl1]
                vjaux[:, p0:p1] = numpy.einsum('xwp,mw,np->xp', int3c, dm_tril,
                                               rhoj[:, p0:p1])
                int3c = None

            # (d/dX P|Q)
            int2c_e1 = auxmol.intor('int2c2e_ip1', aosym='s1')
            vjaux -= numpy.einsum('xpq,mp,nq->xp', int2c_e1, rhoj, rhoj)

            vjaux = [
                -vjaux[:, p0:p1].sum(axis=1) for p0, p1 in auxslices[:, 2:]
            ]
            vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        else:
            vj = -vj.reshape(out_shape)
        return vj, None

    mo_coeff = mf_grad.base.mo_coeff
    mo_occ = mf_grad.base.mo_occ
    nmo = mo_occ.shape[-1]
    if isinstance(mf_grad.base, scf.rohf.ROHF):
        mo_coeff = numpy.vstack((mo_coeff, mo_coeff))
        mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double)
        mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double)
        assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum())
        mo_occ = numpy.vstack((mo_occa, mo_occb))

    mo_coeff = numpy.asarray(mo_coeff).reshape(-1, nao, nmo)
    mo_occ = numpy.asarray(mo_occ).reshape(-1, nmo)
    rhoj = numpy.zeros((nset, naux))
    f_rhok = lib.H5TmpFile()
    orbo = []
    for i in range(nset):
        c = numpy.einsum('pi,i->pi', mo_coeff[i][:, mo_occ[i] > 0],
                         numpy.sqrt(mo_occ[i][mo_occ[i] > 0]))
        nocc = c.shape[1]
        orbo.append(c)

    # (P|Q)
    int2c = scipy.linalg.cho_factor(auxmol.intor('int2c2e', aosym='s1'))

    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = max_memory * .5e6 / 8 / (naux * nao)
    mol_ao_ranges = balance_partition(ao_loc, blksize)
    nsteps = len(mol_ao_ranges)
    for istep, (shl0, shl1, nd) in enumerate(mol_ao_ranges):
        int3c = get_int3c_s1((0, nbas, shl0, shl1, 0, nauxbas))
        p0, p1 = ao_loc[shl0], ao_loc[shl1]
        rhoj += numpy.einsum('nlk,klp->np', dms[:, p0:p1], int3c)
        for i in range(nset):
            v = lib.einsum('ko,klp->plo', orbo[i], int3c)
            v = scipy.linalg.cho_solve(int2c, v.reshape(naux, -1))
            f_rhok['%s/%s' % (i, istep)] = v.reshape(naux, p1 - p0, -1)
        int3c = v = None

    rhoj = scipy.linalg.cho_solve(int2c, rhoj.T).T
    int2c = None

    def load(set_id, p0, p1):
        nocc = orbo[set_id].shape[1]
        buf = numpy.empty((p1 - p0, nocc, nao))
        col1 = 0
        for istep in range(nsteps):
            dat = f_rhok['%s/%s' % (set_id, istep)][p0:p1]
            col0, col1 = col1, col1 + dat.shape[1]
            buf[:p1 - p0, :, col0:col1] = dat.transpose(0, 2, 1)
        return buf

    vj = numpy.zeros((nset, 3, nao, nao))
    vk = numpy.zeros((nset, 3, nao, nao))
    # (d/dX i,j|P)
    for shl0, shl1, nL in ao_ranges:
        int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        vj += numpy.einsum('xijp,np->nxij', int3c, rhoj[:, p0:p1])
        for i in range(nset):
            tmp = lib.einsum('xijp,jo->xipo', int3c, orbo[i])
            rhok = load(i, p0, p1)
            vk[i] += lib.einsum('xipo,pok->xik', tmp, rhok)
            tmp = rhok = None
        int3c = None

    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = int(min(max(max_memory * .5e6 / 8 / (nao * nocc), 20), naux))
    rhok_oo = []
    for i in range(nset):
        nocc = orbo[i].shape[1]
        tmp = numpy.empty((naux, nocc, nocc))
        for p0, p1 in lib.prange(0, naux, blksize):
            rhok = load(i, p0, p1)
            tmp[p0:p1] = lib.einsum('pok,kr->por', rhok, orbo[i])
        rhok_oo.append(tmp)
        rhok = tmp = None

    if mf_grad.auxbasis_response:
        vjaux = numpy.zeros((3, naux))
        vkaux = numpy.zeros((3, naux))
        # (i,j|d/dX P)
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            int3c = int3c.transpose(0, 2, 1).reshape(3 * (p1 - p0), -1)
            int3c = lib.unpack_tril(int3c)
            int3c = int3c.reshape(3, p1 - p0, nao, nao)
            vjaux[:, p0:p1] = numpy.einsum('xpij,mji,np->xp', int3c, dms,
                                           rhoj[:, p0:p1])
            for i in range(nset):
                tmp = rhok_oo[i][p0:p1]
                tmp = lib.einsum('por,ir->pio', tmp, orbo[i])
                tmp = lib.einsum('pio,jo->pij', tmp, orbo[i])
                vkaux[:, p0:p1] += lib.einsum('xpij,pij->xp', int3c, tmp)
        int3c = tmp = None

        # (d/dX P|Q)
        int2c_e1 = auxmol.intor('int2c2e_ip1')
        vjaux -= numpy.einsum('xpq,mp,nq->xp', int2c_e1, rhoj, rhoj)
        for i in range(nset):
            tmp = lib.einsum('pij,qij->pq', rhok_oo[i], rhok_oo[i])
            vkaux -= numpy.einsum('xpq,pq->xp', int2c_e1, tmp)

        vjaux = [-vjaux[:, p0:p1].sum(axis=1) for p0, p1 in auxslices[:, 2:]]
        vkaux = [-vkaux[:, p0:p1].sum(axis=1) for p0, p1 in auxslices[:, 2:]]
        vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        vk = lib.tag_array(-vk.reshape(out_shape), aux=numpy.array(vkaux))
    else:
        vj = -vj.reshape(out_shape)
        vk = -vk.reshape(out_shape)
    return vj, vk
Example #5
0
 def tril_prange(start, stop, step):
     cum_costs = numpy.arange(stop+1)**2
     tasks = balance_partition(cum_costs, step, start, stop)
     return tasks
Example #6
0
def get_jk(mf_grad,
           mol=None,
           dm=None,
           hermi=0,
           with_j=True,
           with_k=True,
           ishf=True):
    t0 = (time.clock(), time.time())
    if mol is None: mol = mf_grad.mol
    if dm is None: dm = mf_grad.base.make_rdm1()

    with_df = mf_grad.base.with_df
    auxmol = with_df.auxmol
    if auxmol is None:
        auxmol = df.addons.make_auxmol(with_df.mol, with_df.auxbasis)
    pmol = mol + auxmol
    ao_loc = mol.ao_loc
    nbas = mol.nbas
    nauxbas = auxmol.nbas

    get_int3c_s1 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's1')
    get_int3c_s2 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij')
    get_int3c_ip1 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip1', 's1')
    get_int3c_ip2 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij')

    nao = mol.nao
    naux = auxmol.nao
    dms = numpy.asarray(dm)
    out_shape = dms.shape[:-2] + (3, ) + dms.shape[-2:]
    dms = dms.reshape(-1, nao, nao)
    nset = dms.shape[0]

    idx = numpy.arange(nao)
    idx = idx * (idx + 1) // 2 + idx
    dm_tril = dms + dms.transpose(0, 2, 1)
    dm_tril = lib.pack_tril(dm_tril)
    dm_tril[:, idx] *= .5

    auxslices = auxmol.aoslice_by_atom()
    aux_loc = auxmol.ao_loc
    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = int(min(max(max_memory * .5e6 / 8 / (nao**2 * 3), 20), naux,
                      240))
    ao_ranges = balance_partition(aux_loc, blksize)

    if not with_k:

        # (i,j|P)
        rhoj = numpy.empty((nset, naux))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_s2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            rhoj[:, p0:p1] = lib.einsum('wp,nw->np', int3c, dm_tril)
            int3c = None

        # (P|Q)
        int2c = auxmol.intor('int2c2e', aosym='s1')
        rhoj = scipy.linalg.solve(int2c, rhoj.T, sym_pos=True).T
        int2c = None

        # (d/dX i,j|P)
        vj = numpy.zeros((nset, 3, nao, nao))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            vj += lib.einsum('xijp,np->nxij', int3c, rhoj[:, p0:p1])
            int3c = None

        if mf_grad.auxbasis_response:
            # (i,j|d/dX P)
            vjaux = numpy.empty((nset, nset, 3, naux))
            for shl0, shl1, nL in ao_ranges:
                int3c = get_int3c_ip2(
                    (0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
                p0, p1 = aux_loc[shl0], aux_loc[shl1]
                vjaux[:, :, :, p0:p1] = lib.einsum('xwp,mw,np->mnxp', int3c,
                                                   dm_tril, rhoj[:, p0:p1])
                int3c = None

            # (d/dX P|Q)
            int2c_e1 = auxmol.intor('int2c2e_ip1', aosym='s1')
            vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj)

            vjaux = numpy.array([
                -vjaux[:, :, :, p0:p1].sum(axis=3)
                for p0, p1 in auxslices[:, 2:]
            ])
            if ishf:
                vjaux = vjaux.sum((1, 2))
            else:
                vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3))
            vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        else:
            vj = -vj.reshape(out_shape)
        logger.timer(mf_grad, 'df vj', *t0)
        return vj, None

    if hasattr(dm, 'mo_coeff') and hasattr(dm, 'mo_occ'):
        mo_coeff = dm.mo_coeff
        mo_occ = dm.mo_occ
    elif ishf:
        mo_coeff = mf_grad.base.mo_coeff
        mo_occ = mf_grad.base.mo_occ
        if isinstance(mf_grad.base, scf.rohf.ROHF):
            mo_coeff = numpy.vstack((mo_coeff, mo_coeff))
            mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double)
            mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double)
            assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum())
            mo_occ = numpy.vstack((mo_occa, mo_occb))
    else:
        s0 = mol.intor('int1e_ovlp')
        mo_occ = []
        mo_coeff = []
        for dm in dms:
            sdms = reduce(lib.dot, (s0, dm, s0))
            n, c = scipy.linalg.eigh(sdms, b=s0)
            mo_occ.append(n)
            mo_coeff.append(c)
        mo_occ = numpy.stack(mo_occ, axis=0)
    nmo = mo_occ.shape[-1]

    mo_coeff = numpy.asarray(mo_coeff).reshape(-1, nao, nmo)
    mo_occ = numpy.asarray(mo_occ).reshape(-1, nmo)
    rhoj = numpy.zeros((nset, naux))
    f_rhok = lib.H5TmpFile()
    orbor = []
    orbol = []
    nocc = []
    orbor_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F')
    orbol_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F')
    offs = 0
    for i in range(nset):
        idx = numpy.abs(mo_occ[i]) > 1e-8
        nocc.append(numpy.count_nonzero(idx))
        c = mo_coeff[i][:, idx]
        orbol_stack = numpy.append(orbol_stack, c, axis=1)
        orbol.append(orbol_stack[:, offs:offs + nocc[-1]])
        cn = lib.einsum('pi,i->pi', c, mo_occ[i][idx])
        orbor_stack = numpy.append(orbor_stack, cn, axis=1)
        orbor.append(orbor_stack[:, offs:offs + nocc[-1]])
        offs += nocc[-1]

    # (P|Q)
    int2c = scipy.linalg.cho_factor(auxmol.intor('int2c2e', aosym='s1'))

    t1 = (time.clock(), time.time())
    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = max_memory * .5e6 / 8 / (naux * nao)
    mol_ao_ranges = balance_partition(ao_loc, blksize)
    nsteps = len(mol_ao_ranges)
    t2 = t1
    for istep, (shl0, shl1, nd) in enumerate(mol_ao_ranges):
        int3c = get_int3c_s1((0, nbas, shl0, shl1, 0, nauxbas))
        t2 = logger.timer_debug1(mf_grad, 'df grad intor (P|mn)', *t2)
        p0, p1 = ao_loc[shl0], ao_loc[shl1]
        for i in range(nset):
            # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling
            v = lib.dot(int3c.reshape(nao, -1, order='F').T,
                        orbor[i]).reshape(naux, (p1 - p0) * nocc[i])
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad einsum (P|mn) u_ni N_i = v_Pmi',
                                     *t2)
            rhoj[i] += numpy.dot(v, orbol[i][p0:p1].ravel())
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad einsum v_Pmi u_mi = rho_P', *t2)
            v = scipy.linalg.cho_solve(int2c, v)
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad cho_solve (P|Q) D_Qmi = v_Pmi',
                                     *t2)
            f_rhok['%s/%s' % (i, istep)] = v.reshape(naux, p1 - p0, -1)
            t2 = logger.timer_debug1(
                mf_grad,
                'df grad cache D_Pmi (m <-> i transpose upon retrieval)', *t2)
        int3c = v = None

    rhoj = scipy.linalg.cho_solve(int2c, rhoj.T).T
    int2c = None
    t1 = logger.timer_debug1(
        mf_grad, 'df grad vj and vk AO (P|Q) D_Q = (P|mn) D_mn solve', *t1)

    def load(set_id, p0, p1):
        buf = numpy.empty((p1 - p0, nocc[set_id], nao))
        col1 = 0
        for istep in range(nsteps):
            dat = f_rhok['%s/%s' % (set_id, istep)][p0:p1]
            col0, col1 = col1, col1 + dat.shape[1]
            buf[:p1 - p0, :, col0:col1] = dat.transpose(0, 2, 1)
        return buf

    vj = numpy.zeros((nset, 3, nao, nao))
    vk = numpy.zeros((nset, 3, nao, nao))
    # (d/dX i,j|P)
    fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s1  # MO output index slower than AO output index; input AOs are asymmetric
    fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv  # comp and aux indices are slower
    ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s1  # input is not tril_packed
    null = lib.c_null_ptr()
    t2 = t1
    for shl0, shl1, nL in ao_ranges:
        int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0,
                               shl1)).transpose(0, 3, 2,
                                                1)  # (P|mn'), row-major order
        t2 = logger.timer_debug1(mf_grad, "df grad intor (P|mn')", *t2)
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        for i in range(nset):
            # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling
            vj[i, 0] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[0].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            vj[i, 1] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[1].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            vj[i, 2] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[2].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum rho_P (P|mn') rho_P", *t2)
            tmp = numpy.empty((3, p1 - p0, nocc[i], nao),
                              dtype=orbol_stack.dtype)
            fdrv(
                ftrans,
                fmmm,  # xPmn u_mi -> xPin
                tmp.ctypes.data_as(ctypes.c_void_p),
                int3c.ctypes.data_as(ctypes.c_void_p),
                orbol[i].ctypes.data_as(ctypes.c_void_p),
                ctypes.c_int(3 * (p1 - p0)),
                ctypes.c_int(nao),
                (ctypes.c_int * 4)(0, nocc[i], 0, nao),
                null,
                ctypes.c_int(0))
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum (P|mn') u_mi = dg_Pin",
                                     *t2)
            rhok = load(i, p0, p1)
            vk[i] += lib.einsum('xpoi,pok->xik', tmp, rhok)
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum D_Pim dg_Pin = v_ij", *t2)
            rhok = tmp = None
        int3c = None
    t1 = logger.timer_debug1(mf_grad, 'df grad vj and vk AO (P|mn) D_P eval',
                             *t1)

    if mf_grad.auxbasis_response:
        # Cache (P|uv) D_ui c_vj. Must be include both upper and lower triangles
        # over nset.
        max_memory = mf_grad.max_memory - lib.current_memory()[0]
        blksize = int(
            min(max(max_memory * .5e6 / 8 / (nao * max(nocc)), 20), naux))
        rhok_oo = []
        for i, j in product(range(nset), repeat=2):
            tmp = numpy.empty((naux, nocc[i], nocc[j]))
            for p0, p1 in lib.prange(0, naux, blksize):
                rhok = load(i, p0, p1).reshape((p1 - p0) * nocc[i], nao)
                tmp[p0:p1] = lib.dot(rhok,
                                     orbol[j]).reshape(p1 - p0, nocc[i],
                                                       nocc[j])
            rhok_oo.append(tmp)
            rhok = tmp = None
        t1 = logger.timer_debug1(
            mf_grad, 'df grad vj and vk aux d_Pim u_mj = d_Pij eval', *t1)

        vjaux = numpy.zeros((nset, nset, 3, naux))
        vkaux = numpy.zeros((nset, nset, 3, naux))
        # (i,j|d/dX P)
        t2 = t1
        fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2  # MO output index slower than AO output index; input AOs are symmetric
        fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv  # comp and aux indices are slower
        ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2  # input is tril_packed
        null = lib.c_null_ptr()
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            t2 = logger.timer_debug1(mf_grad, "df grad intor (P'|mn)", *t2)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            drhoj = lib.dot(
                int3c.transpose(0, 2, 1).reshape(3 * (p1 - p0), -1),
                dm_tril.T).reshape(3, p1 - p0, -1)  # xpij,mij->xpm
            vjaux[:, :, :, p0:p1] = lib.einsum('xpm,np->mnxp', drhoj,
                                               rhoj[:, p0:p1])
            t2 = logger.timer_debug1(
                mf_grad, "df grad einsum rho_P (P'|mn) D_mn = v_P", *t2)
            tmp = [
                numpy.empty((3, p1 - p0, nocc_i, nao), dtype=orbor_stack.dtype)
                for nocc_i in nocc
            ]
            assert (orbor_stack.flags.f_contiguous), '{} {}'.format(
                orbor_stack.shape, orbor_stack.strides)
            for orb, buf, nocc_i in zip(orbol, tmp, nocc):
                fdrv(
                    ftrans,
                    fmmm,  # gPmn u_ni -> gPim
                    buf.ctypes.data_as(ctypes.c_void_p),
                    int3c.ctypes.data_as(ctypes.c_void_p),
                    orb.ctypes.data_as(ctypes.c_void_p),
                    ctypes.c_int(3 * (p1 - p0)),
                    ctypes.c_int(nao),
                    (ctypes.c_int * 4)(0, nocc_i, 0, nao),
                    null,
                    ctypes.c_int(0))
            int3c = [[
                lib.dot(buf.reshape(-1, nao),
                        orb).reshape(3, p1 - p0, -1, norb)
                for orb, norb in zip(orbor, nocc)
            ] for buf in tmp]  # pim,mj,j -> pij
            t2 = logger.timer_debug1(
                mf_grad, "df grad einsum (P'|mn) u_mi u_nj N_j = v_Pmn", *t2)
            for i, j in product(range(nset), repeat=2):
                k = (i * nset) + j
                tmp = rhok_oo[k][p0:p1]
                vkaux[i, j, :, p0:p1] += lib.einsum('xpij,pij->xp',
                                                    int3c[i][j], tmp)
                t2 = logger.timer_debug1(mf_grad,
                                         "df grad einsum d_Pij v_Pij = v_P",
                                         *t2)
        int3c = tmp = None
        t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|mn) eval",
                                 *t1)

        # (d/dX P|Q)
        int2c_e1 = auxmol.intor('int2c2e_ip1')
        vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj)
        for i, j in product(range(nset), repeat=2):
            k = (i * nset) + j
            l = (j * nset) + i
            tmp = lib.einsum('pij,qji->pq', rhok_oo[k], rhok_oo[l])
            vkaux[i, j] -= lib.einsum('xpq,pq->xp', int2c_e1, tmp)
        t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|Q) eval",
                                 *t1)

        vjaux = numpy.array([
            -vjaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:]
        ])
        vkaux = numpy.array([
            -vkaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:]
        ])
        if ishf:
            vjaux = vjaux.sum((1, 2))
            idx = numpy.array(list(range(nset))) * (nset + 1)
            vkaux = vkaux.reshape((nset**2, 3, mol.natm))[idx, :, :].sum(0)
        else:
            vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3))
            vkaux = numpy.ascontiguousarray(vkaux.transpose(1, 2, 0, 3))
        vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        vk = lib.tag_array(-vk.reshape(out_shape), aux=numpy.array(vkaux))
    else:
        vj = -vj.reshape(out_shape)
        vk = -vk.reshape(out_shape)
    logger.timer(mf_grad, 'df grad vj and vk', *t0)
    return vj, vk
Example #7
0
def grad_elec_dferi (mc_grad, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None, atmlst=None, max_memory=None):
    ''' Evaluate the (P|i'j) d_Pij contribution to the electronic gradient, where d_Pij is the
    DF-2RDM obtained by solve_df_rdm2. The caller must symmetrize (i.e., [(P|i'j) + (P|ij')] d_Pij / 2)
    if necessary. 

    Args:
        mc_grad: MC-SCF gradients method object

    Kwargs:
        mc_cas: ndarray, list, or tuple containing active-space MO coefficients
            If a tuple of length 2, the same pair of MO sets are assumed to apply to
            the internally-contracted and externally-contracted indices of the DF-2rdm:
            (P|Q)d_Qij = (P|kl)d_ijkl -> (P|Q)d_Qij = (P|ij)d_ijij
            If a tuple of length 4, the 4 MO sets are applied to ijkl above in that order
            (first two external, last two internal).
        ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis.
            Not used if dfcasdm2 is provided.
        dfcasdm2: ndarray, tuple, or list containing DF-2rdm in mo_cas basis.
            Computed by solve_df_rdm2 if omitted.
        casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis.
            Computed by mc_grad.fcisolver.make_rdm12 (ci,...) if omitted.
        atmlst: list of integers
            List of nonfrozen atoms, as in grad_elec functions.
            Defaults to list (range (mol.natm))
        max_memory: int
            Maximum memory usage in MB

    Returns:
        dE: ndarray of shape (len (dfcasdm2), len (atmlst), 3) '''
    if isinstance (mc_grad, GradientsBasics):
        mc = mc_grad.base
    else:
        mc = mc_grad
    mol = mc_grad.mol
    auxmol = mc.with_df.auxmol
    ncore, ncas, nao, naux, nbas = mc.ncore, mc.ncas, mol.nao, auxmol.nao, mol.nbas
    nocc = ncore + ncas
    if mo_cas is None: mo_cas = mc.mo_coeff[:,ncore:nocc]
    if max_memory is None: max_memory = mc_grad.max_memory
    if isinstance (mo_cas, np.ndarray) and mo_cas.ndim == 2:
        mo_cas = (mo_cas,)*4
    elif len (mo_cas) == 2:
        mo_cas = (mo_cas[0], mo_cas[1], mo_cas[0], mo_cas[1])
    elif len (mo_cas) == 4:
        mo_cas = tuple (mo_cas)
    else:
        raise RuntimeError ('Invalid shape of np.asarray (mo_cas): {}'.format (mo_cas.shape))
    nmo = [mo.shape[1] for mo in mo_cas]
    if atmlst is None: atmlst = list (range (mol.natm))
    if ci is None: ci = mc.ci
    if dfcasdm2 is None: dfcasdm2 = solve_df_rdm2 (mc, mo_cas=mo_cas[2:], ci=ci, casdm2=casdm2) # d_Pij
    nset = len (dfcasdm2)
    dE = np.zeros ((nset, nao, 3))
    dfcasdm2 = np.array (dfcasdm2)

    # Set up (P|u'v) calculation
    get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e_ip1', 's1')
    max_memory -= lib.current_memory()[0]  
    blklen = nao*((3*nao) + (3*nmo[1]) + (nset*nmo[1]))
    blksize = int (min (max (max_memory * 1e6 / 8 / blklen, 20), 240))
    aux_loc = auxmol.ao_loc
    aux_ranges = balance_partition(aux_loc, blksize)

    # Iterate over auxbasis range
    for shl0, shl1, nL in aux_ranges:
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        int3c = get_int3c ((0, nbas, 0, nbas, shl0, shl1))  # (u'v|P); shape = (3,nao,nao,p1-p0)
        intbuf = lib.einsum ('xuvp,vj->xupj', int3c, mo_cas[1])
        dm2buf = lib.einsum ('ui,npij->nupj', mo_cas[0], dfcasdm2[:,p0:p1,:,:])
        dE -= np.einsum ('nupj,xupj->nux', dm2buf, intbuf) 
        intbuf = dm2buf = None
        intbuf = lib.einsum ('xuvp,vj->xupj', int3c, mo_cas[0])
        dm2buf = lib.einsum ('uj,npij->nupi', mo_cas[1], dfcasdm2[:,p0:p1,:,:])
        dE -= np.einsum ('nupj,xupj->nux', dm2buf, intbuf) 
        intbuf = dm2buf = int3c = None

    aoslices = mol.aoslice_by_atom ()
    dE = np.array ([dE[:,p0:p1].sum (axis=1) for p0, p1 in aoslices[:,2:]]).transpose (1,0,2)
    return np.ascontiguousarray (dE)
Example #8
0
def grad_elec_auxresponse_dferi (mc_grad, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None, atmlst=None, max_memory=None, dferi=None, incl_2c=True):
    ''' Evaluate the [(P'|ij) + (P'|Q) g_Qij] d_Pij contribution to the electronic gradient, where d_Pij is
    the DF-2RDM obtained by solve_df_rdm2 and g_Qij solves (P|Q) g_Qij = (P|ij). The caller must symmetrize
    if necessary (i.e., (P|Q) d_Qij = (P|kl) d_ijkl <-> (P|Q) d_Qkl = (P|ij) d_ijkl in order to get at Q').
    Args:
        mc_grad: MC-SCF gradients method object

    Kwargs:
        mc_cas: ndarray, list, or tuple containing active-space MO coefficients
            If a tuple of length 2, the same pair of MO sets are assumed to apply to
            the internally-contracted and externally-contracted indices of the DF-2rdm:
            (P|Q)d_Qij = (P|kl)d_ijkl -> (P|Q)d_Qij = (P|ij)d_ijij
            If a tuple of length 4, the 4 MO sets are applied to ijkl above in that order
            (first two external, last two internal).
        ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis.
            Not used if dfcasdm2 is provided.
        dfcasdm2: ndarray, tuple, or list containing DF-2rdm in mo_cas basis.
            Computed by solve_df_rdm2 if omitted.
        casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis.
            Computed by mc_grad.fcisolver.make_rdm12 (ci,...) if omitted.
        atmlst: list of integers
            List of nonfrozen atoms, as in grad_elec functions.
            Defaults to list (range (mol.natm))
        max_memory: int
            Maximum memory usage in MB
        dferi: ndarray containing g_Pij for optional precalculation
        incl_2c: bool
            If False, omit the terms depending on (P'|Q)

    Returns:
        dE: list of ndarray of shape (len (atmlst), 3) '''

    if isinstance (mc_grad, GradientsBasics):
        mc = mc_grad.base
    else:
        mc = mc_grad
    mol = mc_grad.mol
    auxmol = mc.with_df.auxmol
    ncore, ncas, nao, naux, nbas = mc.ncore, mc.ncas, mol.nao, auxmol.nao, mol.nbas
    nocc = ncore + ncas
    npair = nao * (nao + 1) // 2
    if mo_cas is None: mo_cas = mc.mo_coeff[:,ncore:nocc]
    if max_memory is None: max_memory = mc.max_memory
    if isinstance (mo_cas, np.ndarray) and mo_cas.ndim == 2:
        mo_cas = (mo_cas,)*4
    elif len (mo_cas) == 2:
        mo_cas = (mo_cas[0], mo_cas[1], mo_cas[0], mo_cas[1])
    elif len (mo_cas) == 4:
        mo_cas = tuple (mo_cas)
    else:
        raise RuntimeError ('Invalid shape of np.asarray (mo_cas): {}'.format (mo_cas.shape))
    nmo = [mo.shape[1] for mo in mo_cas]
    if atmlst is None: atmlst = list (range (mol.natm))
    if ci is None: ci = mc.ci
    if dfcasdm2 is None: dfcasdm2 = solve_df_rdm2 (mc, mo_cas=mo_cas[2:], ci=ci, casdm2=casdm2) # d_Pij = (P|Q)^{-1} (Q|kl) d_ijkl
    nset = len (dfcasdm2)
    dE = np.zeros ((nset, naux, 3))
    dfcasdm2 = np.array (dfcasdm2)

    # Shape dfcasdm2
    mosym, nmo_pair, mo_conc, mo_slice = _conc_mos(mo_cas[0], mo_cas[1], compact=True)
    if 's2' in mosym:
        assert (nmo[0] == nmo[1]), 'How did I get {} with nmo[0] = {} and nmo[1] = {}'.format (mosym, nmo[0], nmo[1])
        dfcasdm2 = dfcasdm2.reshape (nset*naux, nmo[0], nmo[1])
        dfcasdm2 += dfcasdm2.transpose (0,2,1)
        diag_idx = np.arange(nmo[0])
        diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx
        dfcasdm2 = lib.pack_tril (np.ascontiguousarray (dfcasdm2))
        dfcasdm2[:,diag_idx] *= 0.5
    dfcasdm2 = dfcasdm2.reshape (nset, naux, nmo_pair)

    # Do 2c part. Assume memory is no object
    if incl_2c: 
        int2c = auxmol.intor('int2c2e_ip1')
        if (dferi is None): dferi = solve_df_eri (mc, mo_cas=mo_cas[:2]).reshape (naux, nmo_pair) # g_Pij = (P|Q)^{-1} (Q|ij)
        int3c = np.dot (int2c, dferi) # (P'|Q) g_Qij
        dE += lib.einsum ('npi,xpi->npx', dfcasdm2, int3c) # d_Pij (P'|Q) g_Qij
        int2c = int3c = dferi = None

    # Set up 3c part
    get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij')
    max_memory -= lib.current_memory()[0]  
    blklen = 6*npair
    blksize = int (min (max (max_memory * 1e6 / 8 / blklen, 20), 240))
    aux_loc = auxmol.ao_loc
    aux_ranges = balance_partition(aux_loc, blksize)

    # Iterate over auxbasis range and do 3c part
    for shl0, shl1, nL in aux_ranges:
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        int3c = get_int3c ((0, nbas, 0, nbas, shl0, shl1))  # (uv|P'); shape = (3,npair,p1-p0)
        int3c = np.ascontiguousarray (int3c.transpose (0,2,1).reshape (3*(p1-p0), npair))
        int3c = _ao2mo.nr_e2(int3c, mo_conc, mo_slice, aosym='s2', mosym=mosym)
        int3c = int3c.reshape (3,p1-p0,nmo_pair)
        int3c = np.ascontiguousarray (int3c)
        dE[:,p0:p1,:] -= lib.einsum ('npi,xpi->npx', dfcasdm2[:,p0:p1,:], int3c)

    # Ravel to atoms
    auxslices = auxmol.aoslice_by_atom ()
    dE = np.array ([dE[:,p0:p1].sum (axis=1) for p0, p1 in auxslices[:,2:]]).transpose (1,0,2)
    return np.ascontiguousarray (dE)