Ejemplo n.º 1
0
    def add_wvvVV_(self, t1, t2, eris, t2new_tril):
        time0 = time.clock(), time.time()
        nocc, nvir = t1.shape
        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
        p0 = 0
        for i in range(nocc):
            tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
            tau[p0:p0+i+1] += t2[i,:i+1]
            p0 += i + 1
        time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

        p0 = 0
        outbuf = numpy.empty((nvir,nvir,nvir))
        for a in range(nvir):
            buf = _ccsd.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf[:a+1])
            #: t2new_tril[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,:a+1], buf)
            lib.numpy_helper._dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1)*nvir,
                                    tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir),
                                    t2new_tril.reshape(-1,nvir*nvir), 1, 1,
                                    0, 0, a*nvir)

            #: t2new_tril[i,:i+1,:a] += numpy.einsum('xd,abd->xab', tau[:,a], buf[:a])
            if a > 0:
                lib.numpy_helper._dgemm('N', 'T', nocc*(nocc+1)//2, a*nvir, nvir,
                                        tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir),
                                        t2new_tril.reshape(-1,nvir*nvir), 1, 1,
                                        a*nvir, 0, 0)
            p0 += a+1
            time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0)
        return t2new_tril
Ejemplo n.º 2
0
    def add_wvvVV_(self, t1, t2, eris, t2new_tril, max_memory=2000):
        time0 = time.clock(), time.time()
        nocc, nvir = t1.shape
        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
        p0 = 0
        for i in range(nocc):
            tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
            tau[p0:p0+i+1] += t2[i,:i+1]
            p0 += i + 1
        time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

        p0 = 0
        outbuf = numpy.empty((nvir,nvir,nvir))
        for a in range(nvir):
            buf = _ccsd.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf[:a+1])
            #: t2new_tril[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,:a+1], buf)
            lib.numpy_helper._dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1)*nvir,
                                    tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir),
                                    t2new_tril.reshape(-1,nvir*nvir), 1, 1,
                                    0, 0, a*nvir)

            #: t2new_tril[i,:i+1,:a] += numpy.einsum('xd,abd->xab', tau[:,a], buf[:a])
            if a > 0:
                lib.numpy_helper._dgemm('N', 'T', nocc*(nocc+1)//2, a*nvir, nvir,
                                        tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir),
                                        t2new_tril.reshape(-1,nvir*nvir), 1, 1,
                                        a*nvir, 0, 0)
            p0 += a+1
            time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0)
        return t2new_tril
Ejemplo n.º 3
0
    def _make_shared(self):
        cput0 = (time.clock(), time.time())

        t1, t2, eris = self.t1, self.t2, self.eris
        self.Foo = imd.Foo(t1, t2, eris)
        self.Fvv = imd.Fvv(t1, t2, eris)
        self.Fov = imd.Fov(t1, t2, eris)

        # 2 virtuals
        self.Wovvo = imd.Wovvo(t1, t2, eris)
        self.Woovv = eris.oovv

        self._made_shared = True
        logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0)
        return self
Ejemplo n.º 4
0
    def _make_shared(self):
        cput0 = (logger.process_clock(), logger.perf_counter())

        t1, t2, eris = self.t1, self.t2, self.eris
        self.Foo = imd.Foo(t1, t2, eris)
        self.Fvv = imd.Fvv(t1, t2, eris)
        self.Fov = imd.Fov(t1, t2, eris)

        # 2 virtuals
        self.Wovvo = imd.Wovvo(t1, t2, eris)
        self.Woovv = eris.oovv

        self._made_shared = True
        logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0)
        return self
Ejemplo n.º 5
0
    def make_t3p2_ea(self, cc):
        cput0 = (logger.process_clock(), logger.perf_counter())

        t1, t2, eris = cc.t1, cc.t2, self.eris
        delta_E_corr, pt1, pt2, Wovoo, Wvvvo = \
            imd.get_t3p2_imds_slow(cc, t1, t2, eris)
        self.t1 = pt1
        self.t2 = pt2

        self._made_shared = False  # Force update
        self.make_ea()  # Make after t1/t2 updated
        self.Wvvvo = self.Wvvvo + Wvvvo

        self.made_ea_imds = True
        logger.timer_debug1(self, 'EOM-CCSD(T)a EA intermediates', *cput0)
        return self
Ejemplo n.º 6
0
    def make_ea(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (time.clock(), time.time())

        t1, t2, eris = self.t1, self.t2, self.eris

        # 3 or 4 virtuals
        self.Wvovv = imd.Wvovv(t1, t2, eris)
        self.Wvvvv = imd.Wvvvv(t1, t2, eris)
        self.Wvvvo = imd.Wvvvo(t1, t2, eris,self.Wvvvv)

        self.made_ea_imds = True
        logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0)
        return self
Ejemplo n.º 7
0
    def make_ip(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (logger.process_clock(), logger.perf_counter())

        t1, t2, eris = self.t1, self.t2, self.eris

        # 0 or 1 virtuals
        self.Woooo = imd.Woooo(t1, t2, eris)
        self.Wooov = imd.Wooov(t1, t2, eris)
        self.Wovoo = imd.Wovoo(t1, t2, eris)

        self.made_ip_imds = True
        logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0)
        return self
Ejemplo n.º 8
0
    def make_t3p2_ip(self, cc):
        cput0 = (time.clock(), time.time())

        t1, t2, eris = cc.t1, cc.t2, self.eris
        delta_E_corr, pt1, pt2, Wovoo, Wvvvo = \
            imd.get_t3p2_imds_slow(cc, t1, t2, eris)
        self.t1 = pt1
        self.t2 = pt2

        self._made_shared = False  # Force update
        self.make_ip()  # Make after t1/t2 updated
        self.Wovoo = self.Wovoo + Wovoo

        self.made_ip_imds = True
        logger.timer_debug1(self, 'EOM-CCSD(T)a IP intermediates', *cput0)
        return self
Ejemplo n.º 9
0
    def make_ip(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (time.clock(), time.time())

        t1, t2, eris = self.t1, self.t2, self.eris

        # 0 or 1 virtuals
        self.Woooo = imd.Woooo(t1, t2, eris)
        self.Wooov = imd.Wooov(t1, t2, eris)
        self.Wovoo = imd.Wovoo(t1, t2, eris)

        self.made_ip_imds = True
        logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0)
        return self
Ejemplo n.º 10
0
    def make_ea(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (logger.process_clock(), logger.perf_counter())

        t1, t2, eris = self.t1, self.t2, self.eris

        # 3 or 4 virtuals
        self.Wvovv = imd.Wvovv(t1, t2, eris)
        self.Wvvvv = imd.Wvvvv(t1, t2, eris)
        self.Wvvvo = imd.Wvvvo(t1, t2, eris, self.Wvvvv)

        self.made_ea_imds = True
        logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0)
        return self
Ejemplo n.º 11
0
    def make_ip(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (time.clock(), time.time())

        kconserv = self.kconserv
        t1, t2, eris = self.t1, self.t2, self.eris

        # 0 or 1 virtuals
        self.Woooo = imd.Woooo(self._cc, t1, t2, eris, kconserv)
        self.Wooov = imd.Wooov(self._cc, t1, t2, eris, kconserv)
        self.Wovoo = imd.Wovoo(self._cc, t1, t2, eris, kconserv)

        self.made_ip_imds = True
        logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0)
        return self
Ejemplo n.º 12
0
    def _make_shared(self):
        cput0 = (time.clock(), time.time())

        kconserv = self.kconserv
        t1, t2, eris = self.t1, self.t2, self.eris

        self.Foo = imd.Foo(self._cc, t1, t2, eris, kconserv)
        self.Fvv = imd.Fvv(self._cc, t1, t2, eris, kconserv)
        self.Fov = imd.Fov(self._cc, t1, t2, eris, kconserv)

        # 2 virtuals
        self.Wovvo = imd.Wovvo(self._cc, t1, t2, eris, kconserv)
        self.Woovv = eris.oovv

        self._made_shared = True
        logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0)
        return self
Ejemplo n.º 13
0
    def build(self, j_only=None, with_j3c=True, kpts_band=None):
        if self.kpts_band is not None:
            self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3))
        if kpts_band is not None:
            kpts_band = numpy.reshape(kpts_band, (-1, 3))
            if self.kpts_band is None:
                self.kpts_band = kpts_band
            else:
                self.kpts_band = unique(
                    numpy.vstack((self.kpts_band, kpts_band)))[0]

        self.check_sanity()
        self.dump_flags()

        self.auxcell = make_modrho_basis(self.cell, self.auxbasis,
                                         self.exp_to_discard)

        if self.kpts_band is None:
            kpts = self.kpts
            kband_uniq = numpy.zeros((0, 3))
        else:
            kpts = self.kpts
            kband_uniq = [
                k for k in self.kpts_band if len(member(k, kpts)) == 0
            ]
        if j_only is None:
            j_only = self._j_only
        if j_only:
            kall = numpy.vstack([kpts, kband_uniq])
            kptij_lst = numpy.hstack((kall, kall)).reshape(-1, 2, 3)
        else:
            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts)
                         for j in range(i + 1)]
            kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts])
            kptij_lst.extend([(ki, ki) for ki in kband_uniq])
            kptij_lst = numpy.asarray(kptij_lst)

        if with_j3c:
            if isinstance(self._cderi_to_save, str):
                cderi = self._cderi_to_save
            else:
                cderi = self._cderi_to_save.name
            if isinstance(self._cderi, str):
                if self._cderi == cderi and os.path.isfile(cderi):
                    logger.warn(
                        self, 'DF integrals in %s (specified by '
                        '._cderi) is overwritten by GDF '
                        'initialization. ', cderi)
                else:
                    logger.warn(
                        self, 'Value of ._cderi is ignored. '
                        'DF integrals will be saved in file %s .', cderi)
            self._cderi = cderi
            t1 = (time.clock(), time.time())
            self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi)
            t1 = logger.timer_debug1(self, 'j3c', *t1)
        return self
Ejemplo n.º 14
0
def get_gridss(mol, level=1, gthrd=1e-10):
    Ktime = (time.clock(), time.time())
    grids = dft.gen_grid.Grids(mol)
    grids.level = level
    grids.build()

    ao_v = mol.eval_gto('GTOval', grids.coords)
    ao_v *= grids.weights[:, None]
    wao_v0 = ao_v

    mask = numpy.any(wao_v0 > gthrd, axis=1) | numpy.any(wao_v0 < -gthrd,
                                                         axis=1)
    grids.coords = grids.coords[mask]
    grids.weights = grids.weights[mask]
    logger.debug(mol, 'threshold for grids screening %g', gthrd)
    logger.debug(mol, 'number of grids %d', grids.weights.size)
    logger.timer_debug1(mol, "Xg screening", *Ktime)
    return grids
Ejemplo n.º 15
0
    def build(self, j_only=None, with_j3c=True, kpts_band=None):
        if self.kpts_band is not None:
            self.kpts_band = numpy.reshape(self.kpts_band, (-1,3))
        if kpts_band is not None:
            kpts_band = numpy.reshape(kpts_band, (-1,3))
            if self.kpts_band is None:
                self.kpts_band = kpts_band
            else:
                self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0]

        self.check_sanity()
        self.dump_flags()

        self.auxcell = make_modrho_basis(self.cell, self.auxbasis,
                                         self.exp_to_discard)

        # Remove duplicated k-points. Duplicated kpts may lead to a buffer
        # located in incore.wrap_int3c larger than necessary. Integral code
        # only fills necessary part of the buffer, leaving some space in the
        # buffer unfilled.
        uniq_idx = unique(self.kpts)[1]
        kpts = numpy.asarray(self.kpts)[uniq_idx]
        if self.kpts_band is None:
            kband_uniq = numpy.zeros((0,3))
        else:
            kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0]
        if j_only is None:
            j_only = self._j_only
        if j_only:
            kall = numpy.vstack([kpts,kband_uniq])
            kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3)
        else:
            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)]
            kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts])
            kptij_lst.extend([(ki, ki) for ki in kband_uniq])
            kptij_lst = numpy.asarray(kptij_lst)

        if with_j3c:
            if isinstance(self._cderi_to_save, str):
                cderi = self._cderi_to_save
            else:
                cderi = self._cderi_to_save.name
            if isinstance(self._cderi, str):
                if self._cderi == cderi and os.path.isfile(cderi):
                    logger.warn(self, 'DF integrals in %s (specified by '
                                '._cderi) is overwritten by GDF '
                                'initialization. ', cderi)
                else:
                    logger.warn(self, 'Value of ._cderi is ignored. '
                                'DF integrals will be saved in file %s .',
                                cderi)
            self._cderi = cderi
            t1 = (logger.process_clock(), logger.perf_counter())
            self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi)
            t1 = logger.timer_debug1(self, 'j3c', *t1)
        return self
Ejemplo n.º 16
0
    def make_ea(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (time.clock(), time.time())

        kconserv = self.kconserv
        t1, t2, eris = self.t1, self.t2, self.eris

        # FIXME DELETE WOOOO
        # 0 or 1 virtuals
        self.Woooo = imd.Woooo(self._cc, t1, t2, eris, kconserv)
        # 3 or 4 virtuals
        self.Wvovv = imd.Wvovv(self._cc, t1, t2, eris, kconserv)
        self.Wvvvv = imd.Wvvvv(self._cc, t1, t2, eris, kconserv)
        self.Wvvvo = imd.Wvvvo(self._cc, t1, t2, eris, kconserv)

        self.made_ea_imds = True
        logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0)
        return self
Ejemplo n.º 17
0
    def make_ea(self):
        if not self._made_shared:
            self._make_shared()

        cput0 = (time.clock(), time.time())

        kconserv = self.kconserv
        t1, t2, eris = self.t1, self.t2, self.eris

        # FIXME DELETE WOOOO
        # 0 or 1 virtuals
        self.Woooo = imd.Woooo(self._cc, t1, t2, eris, kconserv)
        # 3 or 4 virtuals
        self.Wvovv = imd.Wvovv(self._cc, t1, t2, eris, kconserv)
        self.Wvvvv = imd.Wvvvv(self._cc, t1, t2, eris, kconserv)
        self.Wvvvo = imd.Wvvvo(self._cc, t1, t2, eris, kconserv)

        self.made_ea_imds = True
        logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0)
        return self
Ejemplo n.º 18
0
Archivo: df.py Proyecto: chrinide/pyscf
    def build(self, j_only=None, with_j3c=True, kpts_band=None):
        if self.kpts_band is not None:
            self.kpts_band = numpy.reshape(self.kpts_band, (-1,3))
        if kpts_band is not None:
            kpts_band = numpy.reshape(kpts_band, (-1,3))
            if self.kpts_band is None:
                self.kpts_band = kpts_band
            else:
                self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0]

        self.check_sanity()
        self.dump_flags()

        self.auxcell = make_modrho_basis(self.cell, self.auxbasis,
                                         self.exp_to_discard)

        if self.kpts_band is None:
            kpts = self.kpts
            kband_uniq = numpy.zeros((0,3))
        else:
            kpts = self.kpts
            kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0]
        if j_only is None:
            j_only = self._j_only
        if j_only:
            kall = numpy.vstack([kpts,kband_uniq])
            kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3)
        else:
            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)]
            kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts])
            kptij_lst.extend([(ki, ki) for ki in kband_uniq])
            kptij_lst = numpy.asarray(kptij_lst)

        if with_j3c:
            if isinstance(self._cderi_to_save, str):
                cderi = self._cderi_to_save
            else:
                cderi = self._cderi_to_save.name
            if isinstance(self._cderi, str):
                if self._cderi == cderi and os.path.isfile(cderi):
                    logger.warn(self, 'DF integrals in %s (specified by '
                                '._cderi) is overwritten by GDF '
                                'initialization. ', cderi)
                else:
                    logger.warn(self, 'Value of ._cderi is ignored. '
                                'DF integrals will be saved in file %s .',
                                cderi)
            self._cderi = cderi
            t1 = (time.clock(), time.time())
            self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi)
            t1 = logger.timer_debug1(self, 'j3c', *t1)
        return self
Ejemplo n.º 19
0
def get_gridss(mol, level=1, gthrd=1e-10):
    Ktime = (time.clock(), time.time())
    grids = dft.gen_grid.Grids(mol)
    grids.level = level
    grids.build()

    ngrids = grids.weights.size
    mask = []
    for p0, p1 in lib.prange(0, ngrids, 10000):
        ao_v = mol.eval_gto('GTOval', grids.coords[p0:p1])
        ao_v *= grids.weights[p0:p1,None]
        wao_v0 = ao_v
        mask.append(numpy.any(wao_v0>gthrd, axis=1) |
                    numpy.any(wao_v0<-gthrd, axis=1))

    mask = numpy.hstack(mask)
    grids.coords = grids.coords[mask]
    grids.weights = grids.weights[mask]
    logger.debug(mol, 'threshold for grids screening %g', gthrd)
    logger.debug(mol, 'number of grids %d', grids.weights.size)
    logger.timer_debug1(mol, "Xg screening", *Ktime)
    return grids
Ejemplo n.º 20
0
def get_pp(mydf, kpts=None):
    '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed.
    '''
    t0 = (time.clock(), time.time())
    cell = mydf.cell
    if kpts is None:
        kpts_lst = numpy.zeros((1, 3))
    else:
        kpts_lst = numpy.reshape(kpts, (-1, 3))
    nkpts = len(kpts_lst)

    vloc1 = get_pp_loc_part1(mydf, kpts_lst)
    t1 = logger.timer_debug1(mydf, 'get_pp_loc_part1', *t0)
    vloc2 = pseudo.pp_int.get_pp_loc_part2(cell, kpts_lst)
    t1 = logger.timer_debug1(mydf, 'get_pp_loc_part2', *t1)
    vpp = pseudo.pp_int.get_pp_nl(cell, kpts_lst)
    for k in range(nkpts):
        vpp[k] += vloc1[k] + vloc2[k]
    t1 = logger.timer_debug1(mydf, 'get_pp_nl', *t1)

    if kpts is None or numpy.shape(kpts) == (3, ):
        vpp = vpp[0]
    logger.timer(mydf, 'get_pp', *t0)
    return vpp
Ejemplo n.º 21
0
Archivo: df.py Proyecto: pulkin/pyscf
    def build(self, j_only=None, with_j3c=True, kpts_band=None):
        if self.kpts_band is not None:
            self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3))
        if kpts_band is not None:
            kpts_band = numpy.reshape(kpts_band, (-1, 3))
            if self.kpts_band is None:
                self.kpts_band = kpts_band
            else:
                self.kpts_band = unique(
                    numpy.vstack((self.kpts_band, kpts_band)))[0]

        self.dump_flags()

        self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.eta)

        if self.kpts_band is None:
            kpts = self.kpts
            kband_uniq = numpy.zeros((0, 3))
        else:
            kpts = self.kpts
            kband_uniq = [
                k for k in self.kpts_band if len(member(k, kpts)) == 0
            ]
        if j_only is None:
            j_only = self._j_only
        if j_only:
            kall = numpy.vstack([kpts, kband_uniq])
            kptij_lst = numpy.hstack((kall, kall)).reshape(-1, 2, 3)
        else:
            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts)
                         for j in range(i + 1)]
            kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts])
            kptij_lst.extend([(ki, ki) for ki in kband_uniq])
            kptij_lst = numpy.asarray(kptij_lst)

        if not isinstance(self._cderi, str):
            if isinstance(self._cderi_file, str):
                self._cderi = self._cderi_file
            else:
                self._cderi = self._cderi_file.name

        if with_j3c:
            t1 = (time.clock(), time.time())
            self._make_j3c(self.cell, self.auxcell, kptij_lst)
            t1 = logger.timer_debug1(self, 'j3c', *t1)
        return self
Ejemplo n.º 22
0
    def build(self, j_only=False, with_j3c=True, kpts_band=None):
        if self.kpts_band is not None:
            self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3))
        if kpts_band is not None:
            kpts_band = numpy.reshape(kpts_band, (-1, 3))
            if self.kpts_band is None:
                self.kpts_band = kpts_band
            else:
                self.kpts_band = unique(
                    numpy.vstack((self.kpts_band, kpts_band)))[0]

        self.dump_flags()

        self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.eta)

        if self.kpts_band is None:
            kpts = unique(self.kpts)[0]
        else:
            kpts = unique(numpy.vstack((self.kpts, self.kpts_band)))[0]
        self._j_only = j_only
        if j_only:
            kptij_lst = numpy.hstack((kpts, kpts)).reshape(-1, 2, 3)
        else:
            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts)
                         for j in range(i + 1)]
            kptij_lst = numpy.asarray(kptij_lst)

        if not isinstance(self._cderi, str):
            if isinstance(self._cderi_file, str):
                self._cderi = self._cderi_file
            else:
                self._cderi = self._cderi_file.name

        if with_j3c:
            t1 = (time.clock(), time.time())
            self._make_j3c(self.cell, self.auxcell, kptij_lst)
            t1 = logger.timer_debug1(self, 'j3c', *t1)
        return self
Ejemplo n.º 23
0
def _add_vvVV(mycc, t1, t2ab, eris, out=None):
    '''Ht2 = np.einsum('iJcD,acBD->iJaB', t2ab, vvVV)
    without using symmetry in t2ab or Ht2
    '''
    time0 = time.clock(), time.time()
    if t2ab.size == 0:
        return np.zeros_like(t2ab)
    if t1 is not None:
        t2ab = make_tau_ab(t2ab, t1, t1)

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocca, noccb, nvira, nvirb = t2ab.shape

    if mycc.direct:  # AO direct CCSD
        if getattr(eris, 'mo_coeff', None) is not None:
            mo_a, mo_b = eris.mo_coeff
        else:
            moidxa, moidxb = mycc.get_frozen_mask()
            mo_a = mycc.mo_coeff[0][:, moidxa]
            mo_b = mycc.mo_coeff[1][:, moidxb]
        # Note tensor t2ab may be t2bbab from eom_uccsd code.  In that
        # particular case, nocca, noccb do not equal to the actual number of
        # alpha/beta occupied orbitals. orbva and orbvb cannot be indexed as
        # mo_a[:,nocca:] and mo_b[:,noccb:]
        orbva = mo_a[:, -nvira:]
        orbvb = mo_b[:, -nvirb:]
        tau = lib.einsum('ijab,pa->ijpb', t2ab, orbva)
        tau = lib.einsum('ijab,pb->ijap', tau, orbvb)
        time0 = logger.timer_debug1(mycc, 'vvvv-tau mo2ao', *time0)
        buf = eris._contract_vvVV_t2(mycc, tau, mycc.direct, out, log)
        mo = np.asarray(np.hstack((orbva, orbvb)), order='F')
        Ht2 = _ao2mo.nr_e2(buf.reshape(nocca * noccb, -1), mo.conj(),
                           (0, nvira, nvira, nvira + nvirb), 's1', 's1')
        return Ht2.reshape(t2ab.shape)
    else:
        return eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, out, log)
Ejemplo n.º 24
0
def _add_vvVV(mycc, t1, t2ab, eris, out=None):
    '''Ht2 = np.einsum('iJcD,acBD->iJaB', t2ab, vvVV)
    without using symmetry in t2ab or Ht2
    '''
    time0 = time.clock(), time.time()
    if t2ab.size == 0:
        return np.zeros_like(t2ab)
    if t1 is not None:
        t2ab = make_tau_ab(t2ab, t1, t1)

    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocca, noccb, nvira, nvirb = t2ab.shape

    if mycc.direct:  # AO direct CCSD
        if getattr(eris, 'mo_coeff', None) is not None:
            mo_a, mo_b = eris.mo_coeff
        else:
            moidxa, moidxb = mycc.get_frozen_mask()
            mo_a = mycc.mo_coeff[0][:,moidxa]
            mo_b = mycc.mo_coeff[1][:,moidxb]
        # Note tensor t2ab may be t2bbab from eom_uccsd code.  In that
        # particular case, nocca, noccb do not equal to the actual number of
        # alpha/beta occupied orbitals. orbva and orbvb cannot be indexed as
        # mo_a[:,nocca:] and mo_b[:,noccb:]
        orbva = mo_a[:,-nvira:]
        orbvb = mo_b[:,-nvirb:]
        tau = lib.einsum('ijab,pa->ijpb', t2ab, orbva)
        tau = lib.einsum('ijab,pb->ijap', tau, orbvb)
        time0 = logger.timer_debug1(mycc, 'vvvv-tau mo2ao', *time0)
        buf = eris._contract_vvVV_t2(mycc, tau, mycc.direct, out, log)
        mo = np.asarray(np.hstack((orbva, orbvb)), order='F')
        Ht2 = _ao2mo.nr_e2(buf.reshape(nocca*noccb,-1), mo.conj(),
                           (0,nvira,nvira,nvira+nvirb), 's1', 's1')
        return Ht2.reshape(t2ab.shape)
    else:
        return eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, out, log)
Ejemplo n.º 25
0
    def add_wvvVV_(self, t2, eris, t2new_tril):
        time0 = time.clock(), time.time()
        nocc, nvir = t2.shape[1:3]

        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1):
            nao = tau.shape[-1]
            ic = i1 - i0
            jc = j1 - j0
            #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri)
            _dgemm('N', 'N',
                   nocc * (nocc + 1) // 2, jc * nao, ic * nao,
                   tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao),
                   t2new_tril.reshape(-1, nao * nao), 1, 1, i0 * nao, 0,
                   j0 * nao)

            #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri)
            _dgemm('N', 'T',
                   nocc * (nocc + 1) // 2, ic * nao, jc * nao,
                   tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao),
                   t2new_tril.reshape(-1, nao * nao), 1, 1, j0 * nao, 0,
                   i0 * nao)

        def contract_tril_(t2new_tril, tau, eri, a0, a):
            nvir = tau.shape[-1]
            #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri)
            _dgemm('N', 'N',
                   nocc * (nocc + 1) // 2, nvir, (a + 1 - a0) * nvir,
                   tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir),
                   t2new_tril.reshape(-1, nvir * nvir), 1, 1, a0 * nvir, 0,
                   a * nvir)

            #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a])
            if a > a0:
                _dgemm('N', 'T',
                       nocc * (nocc + 1) // 2, (a - a0) * nvir, nvir,
                       tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir),
                       t2new_tril.reshape(-1, nvir * nvir), 1, 1, a * nvir, 0,
                       a0 * nvir)

        if self.direct:  # AO-direct CCSD
            mol = self.mol
            nao, nmo = self.mo_coeff.shape
            nao_pair = nao * (nao + 1) // 2
            aos = numpy.asarray(self.mo_coeff[:, nocc:].T, order='F')
            outbuf = numpy.empty((nocc * (nocc + 1) // 2, nao, nao))
            tau = numpy.ndarray((nocc * (nocc + 1) // 2, nvir, nvir),
                                buffer=outbuf)
            p0 = 0
            for i in range(nocc):
                tau[p0:p0 + i + 1] = t2[i, :i + 1]
                p0 += i + 1
            tau = _ao2mo.nr_e2(tau.reshape(-1, nvir**2), aos, (0, nao, 0, nao),
                               's1', 's1')
            tau = tau.reshape(-1, nao, nao)
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

            ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
            outbuf[:] = 0
            ao_loc = mol.ao_loc_nr()
            max_memory = max(0, self.max_memory - lib.current_memory()[0])
            dmax = max(4, int(numpy.sqrt(max_memory * .95e6 / 8 / nao**2 / 2)))
            sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
            dmax = max(x[2] for x in sh_ranges)
            eribuf = numpy.empty((dmax, dmax, nao, nao))
            loadbuf = numpy.empty((dmax, dmax, nao, nao))
            fint = gto.moleintor.getints2e

            for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
                for jsh0, jsh1, nj in sh_ranges[:ip]:
                    eri = fint('cint2e_sph',
                               mol._atm,
                               mol._bas,
                               mol._env,
                               shls_slice=(ish0, ish1, jsh0, jsh1),
                               aosym='s2kl',
                               ao_loc=ao_loc,
                               cintopt=ao2mopt._cintopt,
                               out=eribuf)
                    i0, i1 = ao_loc[ish0], ao_loc[ish1]
                    j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                    tmp = numpy.ndarray((i1 - i0, nao, j1 - j0, nao),
                                        buffer=loadbuf)
                    _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                           eri.ctypes.data_as(ctypes.c_void_p),
                                           (ctypes.c_int * 4)(i0, i1, j0, j1),
                                           ctypes.c_int(nao))
                    contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1)
                    time0 = logger.timer_debug1(
                        self,
                        'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                        *time0)
                eri = fint('cint2e_sph',
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(ish0, ish1, ish0, ish1),
                           aosym='s4',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                for i in range(i1 - i0):
                    p0, p1 = i * (i + 1) // 2, (i + 1) * (i + 2) // 2
                    tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf)
                    contract_tril_(outbuf, tau, tmp, i0, i0 + i)
                time0 = logger.timer_debug1(
                    self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, ish0, ish1),
                    *time0)
            eribuf = loadbuf = eri = tmp = None

            mo = numpy.asarray(self.mo_coeff, order='F')
            tmp = _ao2mo.nr_e2(outbuf,
                               mo, (nocc, nmo, nocc, nmo),
                               's1',
                               's1',
                               out=tau)
            t2new_tril += tmp.reshape(-1, nvir, nvir)

        else:
            tau = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir))
            p0 = 0
            for i in range(nocc):
                tau[p0:p0 + i + 1] = t2[i, :i + 1]
                p0 += i + 1
            p0 = 0
            outbuf = numpy.empty((nvir, nvir, nvir))
            outbuf1 = numpy.empty((nvir, nvir, nvir))
            handler = None
            for a in range(nvir):
                buf = lib.unpack_tril(eris.vvvv[p0:p0 + a + 1], out=outbuf)
                outbuf, outbuf1 = outbuf1, outbuf
                handler = async_do(handler, contract_tril_, t2new_tril, tau,
                                   buf, 0, a)
                p0 += a + 1
                time0 = logger.timer_debug1(self, 'vvvv %d' % a, *time0)
            handler.join()
        return t2new_tril
Ejemplo n.º 26
0
def get_j(dfobj, dm, hermi=1, direct_scf_tol=1e-13):
    from pyscf.scf import _vhf
    from pyscf.scf import jk
    from pyscf.df import addons
    t0 = t1 = (time.clock(), time.time())

    mol = dfobj.mol
    if dfobj._vjopt is None:
        dfobj.auxmol = auxmol = addons.make_auxmol(mol, dfobj.auxbasis)
        opt = _vhf.VHFOpt(mol, 'int3c2e', 'CVHFnr3c2e_schwarz_cond')
        opt.direct_scf_tol = direct_scf_tol

        # q_cond part 1: the regular int2e (ij|ij) for mol's basis
        opt.init_cvhf_direct(mol, 'int2e', 'CVHFsetnr_direct_scf')
        mol_q_cond = lib.frompointer(opt._this.contents.q_cond, mol.nbas**2)

        # Update q_cond to include the 2e-integrals (auxmol|auxmol)
        j2c = auxmol.intor('int2c2e', hermi=1)
        j2c_diag = numpy.sqrt(abs(j2c.diagonal()))
        aux_loc = auxmol.ao_loc
        aux_q_cond = [
            j2c_diag[i0:i1].max() for i0, i1 in zip(aux_loc[:-1], aux_loc[1:])
        ]
        q_cond = numpy.hstack((mol_q_cond, aux_q_cond))
        fsetqcond = _vhf.libcvhf.CVHFset_q_cond
        fsetqcond(opt._this, q_cond.ctypes.data_as(ctypes.c_void_p),
                  ctypes.c_int(q_cond.size))

        try:
            opt.j2c = j2c = scipy.linalg.cho_factor(j2c, lower=True)
            opt.j2c_type = 'cd'
        except scipy.linalg.LinAlgError:
            opt.j2c = j2c
            opt.j2c_type = 'regular'

        # jk.get_jk function supports 4-index integrals. Use bas_placeholder
        # (l=0, nctr=1, 1 function) to hold the last index.
        bas_placeholder = numpy.array([0, 0, 1, 1, 0, 0, 0, 0],
                                      dtype=numpy.int32)
        fakemol = mol + auxmol
        fakemol._bas = numpy.vstack((fakemol._bas, bas_placeholder))
        opt.fakemol = fakemol
        dfobj._vjopt = opt
        t1 = logger.timer_debug1(dfobj, 'df-vj init_direct_scf', *t1)

    opt = dfobj._vjopt
    fakemol = opt.fakemol
    dm = numpy.asarray(dm, order='C')
    dm_shape = dm.shape
    nao = dm_shape[-1]
    dm = dm.reshape(-1, nao, nao)
    n_dm = dm.shape[0]

    # First compute the density in auxiliary basis
    # j3c = fauxe2(mol, auxmol)
    # jaux = numpy.einsum('ijk,ji->k', j3c, dm)
    # rho = numpy.linalg.solve(auxmol.intor('int2c2e'), jaux)
    nbas = mol.nbas
    nbas1 = mol.nbas + dfobj.auxmol.nbas
    shls_slice = (0, nbas, 0, nbas, nbas, nbas1, nbas1, nbas1 + 1)
    with lib.temporary_env(opt,
                           prescreen='CVHFnr3c2e_vj_pass1_prescreen',
                           _dmcondname='CVHFsetnr_direct_scf_dm'):
        jaux = jk.get_jk(fakemol,
                         dm, ['ijkl,ji->kl'] * n_dm,
                         'int3c2e',
                         aosym='s2ij',
                         hermi=0,
                         shls_slice=shls_slice,
                         vhfopt=opt)
    # remove the index corresponding to bas_placeholder
    jaux = numpy.array(jaux)[:, :, 0]
    t1 = logger.timer_debug1(dfobj, 'df-vj pass 1', *t1)

    if opt.j2c_type == 'cd':
        rho = scipy.linalg.cho_solve(opt.j2c, jaux.T)
    else:
        rho = scipy.linalg.solve(opt.j2c, jaux.T)
    # transform rho to shape (:,1,naux), to adapt to 3c2e integrals (ij|k)
    rho = rho.T[:, numpy.newaxis, :]
    t1 = logger.timer_debug1(dfobj, 'df-vj solve ', *t1)

    # Next compute the Coulomb matrix
    # j3c = fauxe2(mol, auxmol)
    # vj = numpy.einsum('ijk,k->ij', j3c, rho)
    with lib.temporary_env(opt,
                           prescreen='CVHFnr3c2e_vj_pass2_prescreen',
                           _dmcondname=None):
        # CVHFnr3c2e_vj_pass2_prescreen requires custom dm_cond
        aux_loc = dfobj.auxmol.ao_loc
        dm_cond = [
            abs(rho[:, :, i0:i1]).max()
            for i0, i1 in zip(aux_loc[:-1], aux_loc[1:])
        ]
        dm_cond = numpy.array(dm_cond)
        fsetcond = _vhf.libcvhf.CVHFset_dm_cond
        fsetcond(opt._this, dm_cond.ctypes.data_as(ctypes.c_void_p),
                 ctypes.c_int(dm_cond.size))

        vj = jk.get_jk(fakemol,
                       rho, ['ijkl,lk->ij'] * n_dm,
                       'int3c2e',
                       aosym='s2ij',
                       hermi=1,
                       shls_slice=shls_slice,
                       vhfopt=opt)

    t1 = logger.timer_debug1(dfobj, 'df-vj pass 2', *t1)
    logger.timer(dfobj, 'df-vj', *t0)
    return numpy.asarray(vj).reshape(dm_shape)
Ejemplo n.º 27
0
Archivo: cisd.py Proyecto: eronca/pyscf
    def add_wvvVV_(self, t2, eris, t2new_tril):
        time0 = time.clock(), time.time()
        nocc, nvir = t2.shape[1:3]

        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1):
            nao = tau.shape[-1]
            ic = i1 - i0
            jc = j1 - j0
            #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, jc*nao, ic*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, i0*nao, 0, j0*nao)

            #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri)
            _dgemm('N', 'T', nocc*(nocc+1)//2, ic*nao, jc*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, j0*nao, 0, i0*nao)

        def contract_tril_(t2new_tril, tau, eri, a0, a):
            nvir = tau.shape[-1]
            #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1-a0)*nvir,
                   tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                   t2new_tril.reshape(-1,nvir*nvir), 1, 1, a0*nvir, 0, a*nvir)

            #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a])
            if a > a0:
                _dgemm('N', 'T', nocc*(nocc+1)//2, (a-a0)*nvir, nvir,
                       tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                       t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, a0*nvir)

        if self.direct:   # AO-direct CCSD
            mol = self.mol
            nao, nmo = self.mo_coeff.shape
            nao_pair = nao * (nao+1) // 2
            aos = numpy.asarray(self.mo_coeff[:,nocc:].T, order='F')
            outbuf = numpy.empty((nocc*(nocc+1)//2,nao,nao))
            tau = numpy.ndarray((nocc*(nocc+1)//2,nvir,nvir), buffer=outbuf)
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = t2[i,:i+1]
                p0 += i + 1
            tau = _ao2mo.nr_e2(tau.reshape(-1,nvir**2), aos, (0,nao,0,nao), 's1', 's1')
            tau = tau.reshape(-1,nao,nao)
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

            ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
            outbuf[:] = 0
            ao_loc = mol.ao_loc_nr()
            max_memory = max(0, self.max_memory - lib.current_memory()[0])
            dmax = max(4, int(numpy.sqrt(max_memory*.95e6/8/nao**2/2)))
            sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
            dmax = max(x[2] for x in sh_ranges)
            eribuf = numpy.empty((dmax,dmax,nao,nao))
            loadbuf = numpy.empty((dmax,dmax,nao,nao))
            fint = gto.moleintor.getints2e

            for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
                for jsh0, jsh1, nj in sh_ranges[:ip]:
                    eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                               shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl',
                               ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                    i0, i1 = ao_loc[ish0], ao_loc[ish1]
                    j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                    tmp = numpy.ndarray((i1-i0,nao,j1-j0,nao), buffer=loadbuf)
                    _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                           eri.ctypes.data_as(ctypes.c_void_p),
                                           (ctypes.c_int*4)(i0, i1, j0, j1),
                                           ctypes.c_int(nao))
                    contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1)
                    time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                                (ish0,ish1,jsh0,jsh1), *time0)
                eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                           shls_slice=(ish0,ish1,ish0,ish1), aosym='s4',
                           ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                for i in range(i1-i0):
                    p0, p1 = i*(i+1)//2, (i+1)*(i+2)//2
                    tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf)
                    contract_tril_(outbuf, tau, tmp, i0, i0+i)
                time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                            (ish0,ish1,ish0,ish1), *time0)
            eribuf = loadbuf = eri = tmp = None

            mo = numpy.asarray(self.mo_coeff, order='F')
            tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,nocc,nmo), 's1', 's1', out=tau)
            t2new_tril += tmp.reshape(-1,nvir,nvir)

        else:
            tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = t2[i,:i+1]
                p0 += i + 1
            p0 = 0
            outbuf = numpy.empty((nvir,nvir,nvir))
            outbuf1 = numpy.empty((nvir,nvir,nvir))
            handler = None
            for a in range(nvir):
                buf = lib.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf)
                outbuf, outbuf1 = outbuf1, outbuf
                handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a)
                p0 += a+1
                time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0)
            handler.join()
        return t2new_tril
Ejemplo n.º 28
0
def get_t3p2_imds(mycc, t1, t2, eris=None, t3p2_ip_out=None, t3p2_ea_out=None):
    """For a description of arguments, see `get_t3p2_imds_slow` in
    the corresponding `kintermediates.py`.
    """
    from pyscf.pbc.cc.kccsd_t_rhf import _get_epqr
    cpu1 = cpu0 = (logger.process_clock(), logger.perf_counter())
    if eris is None:
        eris = mycc.ao2mo()
    fock = eris.fock
    nkpts, nocc, nvir = t1.shape
    cell = mycc._scf.cell
    kpts = mycc.kpts
    kconserv = mycc.khelper.kconserv
    dtype = np.result_type(t1, t2)

    fov = fock[:, :nocc, nocc:]
    #foo = np.asarray([fock[ikpt, :nocc, :nocc].diagonal() for ikpt in range(nkpts)])
    #fvv = np.asarray([fock[ikpt, nocc:, nocc:].diagonal() for ikpt in range(nkpts)])
    mo_energy_occ = np.array(
        [eris.mo_energy[ki][:nocc] for ki in range(nkpts)])
    mo_energy_vir = np.array(
        [eris.mo_energy[ki][nocc:] for ki in range(nkpts)])

    mo_e_o = mo_energy_occ
    mo_e_v = mo_energy_vir

    ccsd_energy = mycc.energy(t1, t2, eris)

    if t3p2_ip_out is None:
        t3p2_ip_out = np.zeros((nkpts, nkpts, nkpts, nocc, nvir, nocc, nocc),
                               dtype=dtype)
    Wmcik = t3p2_ip_out

    if t3p2_ea_out is None:
        t3p2_ea_out = np.zeros((nkpts, nkpts, nkpts, nvir, nvir, nvir, nocc),
                               dtype=dtype)
    Wacek = t3p2_ea_out

    # Create necessary temporary eris for fast read
    from pyscf.pbc.cc.kccsd_t_rhf import create_t3_eris, get_data_slices
    feri_tmp, t2T, eris_vvop, eris_vooo_C = create_t3_eris(
        mycc, kconserv, [eris.vovv, eris.oovv, eris.ooov, t2])
    #t1T = np.array([x.T for x in t1], dtype=np.complex, order='C')
    #fvo = np.array([x.T for x in fov], dtype=np.complex, order='C')
    cpu1 = logger.timer_debug1(mycc, 'CCSD(T) tmp eri creation', *cpu1)

    def get_w(ki, kj, kk, ka, kb, kc, a0, a1, b0, b1, c0, c1):
        '''Wijkabc intermediate as described in Scuseria paper before Pijkabc acts

        Function copied for `kccsd_t_rhf.py`'''
        km = kconserv[kc, kk, kb]
        kf = kconserv[kk, kc, kj]
        out = einsum('cfjk,abif->abcijk', t2T[kc, kf, kj, c0:c1, :, :, :],
                     eris_vvop[ka, kb, ki, a0:a1, b0:b1, :, nocc:])
        out = out - einsum('cbmk,aijm->abcijk', t2T[kc, kb, km, c0:c1,
                                                    b0:b1, :, :],
                           eris_vooo_C[ka, ki, kj, a0:a1, :, :, :])
        return out

    def get_permuted_w(ki, kj, kk, ka, kb, kc, orb_indices):
        '''Pijkabc operating on Wijkabc intermediate as described in Scuseria paper

        Function copied for `kccsd_t_rhf.py`'''
        a0, a1, b0, b1, c0, c1 = orb_indices
        out = get_w(ki, kj, kk, ka, kb, kc, a0, a1, b0, b1, c0, c1)
        out = out + get_w(kj, kk, ki, kb, kc, ka, b0, b1, c0, c1, a0,
                          a1).transpose(2, 0, 1, 5, 3, 4)
        out = out + get_w(kk, ki, kj, kc, ka, kb, c0, c1, a0, a1, b0,
                          b1).transpose(1, 2, 0, 4, 5, 3)
        out = out + get_w(ki, kk, kj, ka, kc, kb, a0, a1, c0, c1, b0,
                          b1).transpose(0, 2, 1, 3, 5, 4)
        out = out + get_w(kk, kj, ki, kc, kb, ka, c0, c1, b0, b1, a0,
                          a1).transpose(2, 1, 0, 5, 4, 3)
        out = out + get_w(kj, ki, kk, kb, ka, kc, b0, b1, a0, a1, c0,
                          c1).transpose(1, 0, 2, 4, 3, 5)
        return out

    def get_data(kpt_indices):
        idx_args = get_data_slices(kpt_indices, task, kconserv)
        vvop_indices, vooo_indices, t2T_vvop_indices, t2T_vooo_indices = idx_args
        vvop_data = [eris_vvop[tuple(x)] for x in vvop_indices]
        vooo_data = [eris_vooo_C[tuple(x)] for x in vooo_indices]
        t2T_vvop_data = [t2T[tuple(x)] for x in t2T_vvop_indices]
        t2T_vooo_data = [t2T[tuple(x)] for x in t2T_vooo_indices]
        data = [vvop_data, vooo_data, t2T_vvop_data, t2T_vooo_data]
        return data

    def add_and_permute(kpt_indices, orb_indices, data):
        '''Performs permutation and addition of t3 temporary arrays.'''
        ki, kj, kk, ka, kb, kc = kpt_indices
        a0, a1, b0, b1, c0, c1 = orb_indices
        tmp_t3Tv_ijk = np.asarray(data[0], dtype=dtype, order='C')
        tmp_t3Tv_jik = np.asarray(data[1], dtype=dtype, order='C')
        tmp_t3Tv_kji = np.asarray(data[2], dtype=dtype, order='C')
        #out_ijk = np.empty(data[0].shape, dtype=dtype, order='C')

        #drv = _ccsd.libcc.MPICCadd_and_permute_t3T
        #drv(ctypes.c_int(nocc), ctypes.c_int(nvir),
        #    ctypes.c_int(0),
        #    out_ijk.ctypes.data_as(ctypes.c_void_p),
        #    tmp_t3Tv_ijk.ctypes.data_as(ctypes.c_void_p),
        #    tmp_t3Tv_jik.ctypes.data_as(ctypes.c_void_p),
        #    tmp_t3Tv_kji.ctypes.data_as(ctypes.c_void_p),
        #    mo_offset.ctypes.data_as(ctypes.c_void_p),
        #    slices.ctypes.data_as(ctypes.c_void_p))
        return (2. * tmp_t3Tv_ijk - tmp_t3Tv_jik.transpose(0, 1, 2, 4, 3, 5) -
                tmp_t3Tv_kji.transpose(0, 1, 2, 5, 4, 3))
        #return out_ijk

    # Get location of padded elements in occupied and virtual space
    nonzero_opadding, nonzero_vpadding = padding_k_idx(mycc, kind="split")

    mem_now = lib.current_memory()[0]
    max_memory = max(0, mycc.max_memory - mem_now)
    blkmin = 4
    # temporary t3 array is size:  nkpts**3 * blksize**3 * nocc**3 * 16
    vir_blksize = min(
        nvir,
        max(blkmin, int(
            (max_memory * .9e6 / 16 / nocc**3 / nkpts**3)**(1. / 3))))
    tasks = []
    logger.debug(mycc, 'max_memory %d MB (%d MB in use)', max_memory, mem_now)
    logger.debug(mycc, 'virtual blksize = %d (nvir = %d)', vir_blksize, nvir)
    for a0, a1 in lib.prange(0, nvir, vir_blksize):
        for b0, b1 in lib.prange(0, nvir, vir_blksize):
            for c0, c1 in lib.prange(0, nvir, vir_blksize):
                tasks.append((a0, a1, b0, b1, c0, c1))

    eaa = []
    for ka in range(nkpts):
        eaa.append(mo_e_o[ka][:, None] - mo_e_v[ka][None, :])

    pt1 = np.zeros((nkpts, nocc, nvir), dtype=dtype)
    pt2 = np.zeros((nkpts, nkpts, nkpts, nocc, nocc, nvir, nvir), dtype=dtype)
    for ka, kb in product(range(nkpts), repeat=2):
        for task_id, task in enumerate(tasks):
            cput2 = (logger.process_clock(), logger.perf_counter())
            a0, a1, b0, b1, c0, c1 = task
            my_permuted_w = np.zeros(
                (nkpts, ) * 3 + (a1 - a0, b1 - b0, c1 - c0) + (nocc, ) * 3,
                dtype=dtype)

            for ki, kj, kk in product(range(nkpts), repeat=3):
                # Find momentum conservation condition for triples
                # amplitude t3ijkabc
                kc = kpts_helper.get_kconserv3(cell, kpts,
                                               [ki, kj, kk, ka, kb])

                kpt_indices = [ki, kj, kk, ka, kb, kc]
                #data = get_data(kpt_indices)
                my_permuted_w[ki, kj,
                              kk] = get_permuted_w(ki, kj, kk, ka, kb, kc,
                                                   task)

            for ki, kj, kk in product(range(nkpts), repeat=3):
                # eigenvalue denominator: e(i) + e(j) + e(k)
                eijk = _get_epqr([0, nocc, ki, mo_e_o, nonzero_opadding],
                                 [0, nocc, kj, mo_e_o, nonzero_opadding],
                                 [0, nocc, kk, mo_e_o, nonzero_opadding])

                # Find momentum conservation condition for triples
                # amplitude t3ijkabc
                kc = kpts_helper.get_kconserv3(cell, kpts,
                                               [ki, kj, kk, ka, kb])
                eabc = _get_epqr([a0, a1, ka, mo_e_v, nonzero_vpadding],
                                 [b0, b1, kb, mo_e_v, nonzero_vpadding],
                                 [c0, c1, kc, mo_e_v, nonzero_vpadding],
                                 fac=[-1., -1., -1.])

                kpt_indices = [ki, kj, kk, ka, kb, kc]
                eabcijk = (eijk[None, None, None, :, :, :] +
                           eabc[:, :, :, None, None, None])

                tmp_t3Tv_ijk = my_permuted_w[ki, kj, kk]
                tmp_t3Tv_jik = my_permuted_w[kj, ki, kk]
                tmp_t3Tv_kji = my_permuted_w[kk, kj, ki]
                Ptmp_t3Tv = add_and_permute(
                    kpt_indices, task,
                    (tmp_t3Tv_ijk, tmp_t3Tv_jik, tmp_t3Tv_kji))
                Ptmp_t3Tv /= eabcijk

                # Contribution to T1 amplitudes
                if ki == ka and kc == kconserv[kj, kb, kk]:
                    eris_Soovv = (
                        2. * eris.oovv[kj, kk, kb, :, :, b0:b1, c0:c1] -
                        eris.oovv[kj, kk, kc, :, :, c0:c1, b0:b1].transpose(
                            0, 1, 3, 2))
                    pt1[ka, :, a0:a1] += 0.5 * einsum('abcijk,jkbc->ia',
                                                      Ptmp_t3Tv, eris_Soovv)

                # Contribution to T2 amplitudes
                if ki == ka and kc == kconserv[kj, kb, kk]:
                    tmp = einsum('abcijk,ia->jkbc', Ptmp_t3Tv,
                                 0.5 * fov[ki, :, a0:a1])
                    _add_pt2(pt2, nkpts, kconserv, [kj, kk, kb],
                             [None, None, (b0, b1), (c0, c1)], tmp)

                kd = kconserv[ka, ki, kb]
                eris_vovv = eris.vovv[kd, ki, kb, :, :, b0:b1, a0:a1]
                tmp = einsum('abcijk,diba->jkdc', Ptmp_t3Tv, eris_vovv)
                _add_pt2(pt2, nkpts, kconserv, [kj, kk, kd],
                         [None, None, None, (c0, c1)], tmp)

                km = kconserv[kc, kk, kb]
                eris_ooov = eris.ooov[kj, ki, km, :, :, :, a0:a1]
                tmp = einsum('abcijk,jima->mkbc', Ptmp_t3Tv, eris_ooov)
                _add_pt2(pt2, nkpts, kconserv, [km, kk, kb],
                         [None, None, (b0, b1), (c0, c1)], -1. * tmp)

                # Contribution to Wovoo array
                km = kconserv[ka, ki, kc]
                eris_oovv = eris.oovv[km, ki, kc, :, :, c0:c1, a0:a1]
                tmp = einsum('abcijk,mica->mbkj', Ptmp_t3Tv, eris_oovv)
                Wmcik[km, kb, kk, :, b0:b1, :, :] += tmp

                # Contribution to Wvvoo array
                ke = kconserv[ki, ka, kk]
                eris_oovv = eris.oovv[ki, kk, ka, :, :, a0:a1, :]
                tmp = einsum('abcijk,ikae->cbej', Ptmp_t3Tv, eris_oovv)
                Wacek[kc, kb, ke, c0:c1, b0:b1, :, :] -= tmp

            logger.timer_debug1(
                mycc, 'EOM-CCSD T3[2] ka,kb,vir=(%d,%d,%d/%d) [total=%d]' %
                (ka, kb, task_id, len(tasks), nkpts**5), *cput2)

    for ki in range(nkpts):
        ka = ki
        eia = LARGE_DENOM * np.ones(
            (nocc, nvir), dtype=eris.mo_energy[0].dtype)
        n0_ovp_ia = np.ix_(nonzero_opadding[ki], nonzero_vpadding[ka])
        eia[n0_ovp_ia] = (mo_e_o[ki][:, None] - mo_e_v[ka])[n0_ovp_ia]
        pt1[ki] /= eia

    for ki, ka in product(range(nkpts), repeat=2):
        eia = LARGE_DENOM * np.ones(
            (nocc, nvir), dtype=eris.mo_energy[0].dtype)
        n0_ovp_ia = np.ix_(nonzero_opadding[ki], nonzero_vpadding[ka])
        eia[n0_ovp_ia] = (mo_e_o[ki][:, None] - mo_e_v[ka])[n0_ovp_ia]
        for kj in range(nkpts):
            kb = kconserv[ki, ka, kj]
            ejb = LARGE_DENOM * np.ones(
                (nocc, nvir), dtype=eris.mo_energy[0].dtype)
            n0_ovp_jb = np.ix_(nonzero_opadding[kj], nonzero_vpadding[kb])
            ejb[n0_ovp_jb] = (mo_e_o[kj][:, None] - mo_e_v[kb])[n0_ovp_jb]
            eijab = eia[:, None, :, None] + ejb[:, None, :]
            pt2[ki, kj, ka] /= eijab

    pt1 += t1
    pt2 += t2

    logger.timer(mycc, 'EOM-CCSD(T) imds', *cpu0)

    delta_ccsd_energy = mycc.energy(pt1, pt2, eris) - ccsd_energy
    logger.info(mycc, 'CCSD energy T3[2] correction : %16.12e',
                delta_ccsd_energy)

    return delta_ccsd_energy, pt1, pt2, Wmcik, Wacek
Ejemplo n.º 29
0
def get_k_kpts(mydf,
               dm_kpts,
               hermi=1,
               kpts=np.zeros((1, 3)),
               kpts_band=None,
               exxdiv=None):
    '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points.

    Args:
        dm_kpts : (nkpts, nao, nao) ndarray
            Density matrix at each k-point
        kpts : (nkpts, 3) ndarray

    Kwargs:
        hermi : int
            Whether K matrix is hermitian

            | 0 : not hermitian and not symmetric
            | 1 : hermitian

        kpts_band : (3,) ndarray or (*,3) ndarray
            A list of arbitrary "band" k-points at which to evalute the matrix.

    Returns:
        vj : (nkpts, nao, nao) ndarray
        vk : (nkpts, nao, nao) ndarray
        or list of vj and vk if the input dm_kpts is a list of DMs
    '''
    cell = mydf.cell
    mesh = mydf.mesh
    coords = cell.gen_uniform_grids(mesh)
    ngrids = coords.shape[0]

    if getattr(dm_kpts, 'mo_coeff', None) is not None:
        mo_coeff = dm_kpts.mo_coeff
        mo_occ = dm_kpts.mo_occ
    else:
        mo_coeff = None

    kpts = np.asarray(kpts)
    dm_kpts = lib.asarray(dm_kpts, order='C')
    dms = _format_dms(dm_kpts, kpts)
    nset, nkpts, nao = dms.shape[:3]

    weight = 1. / nkpts * (cell.vol / ngrids)

    kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band
    nband = len(kpts_band)

    if gamma_point(kpts_band) and gamma_point(kpts):
        vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype)
    else:
        vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128)

    coords = mydf.grids.coords
    ao2_kpts = [
        np.asarray(ao.T, order='C')
        for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)
    ]
    if input_band is None:
        ao1_kpts = ao2_kpts
    else:
        ao1_kpts = [
            np.asarray(ao.T, order='C')
            for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band)
        ]
    if mo_coeff is not None and nset == 1:
        mo_coeff = [
            mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0])
            for k, occ in enumerate(mo_occ)
        ]
        ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)]

    mem_now = lib.current_memory()[0]
    max_memory = mydf.max_memory - mem_now
    blksize = int(
        min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / ngrids / nao)))
    logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s  blksize %d',
                  max_memory, blksize)
    #ao1_dtype = np.result_type(*ao1_kpts)
    #ao2_dtype = np.result_type(*ao2_kpts)
    vR_dm = np.empty((nset, nao, ngrids), dtype=vk_kpts.dtype)

    t1 = (logger.process_clock(), logger.perf_counter())
    for k2, ao2T in enumerate(ao2_kpts):
        if ao2T.size == 0:
            continue

        kpt2 = kpts[k2]
        naoj = ao2T.shape[0]
        if mo_coeff is None or nset > 1:
            ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)]
        else:
            ao_dms = [ao2T.conj()]

        for k1, ao1T in enumerate(ao1_kpts):
            kpt1 = kpts_band[k1]

            # If we have an ewald exxdiv, we add the G=0 correction near the
            # end of the function to bypass any discretization errors
            # that arise from the FFT.
            if exxdiv == 'ewald' or exxdiv is None:
                coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh)
            else:
                coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh)
            if is_zero(kpt1 - kpt2):
                expmikr = np.array(1.)
            else:
                expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1))

            for p0, p1 in lib.prange(0, nao, blksize):
                rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr,
                                 ao2T)
                vG = tools.fft(rho1.reshape(-1, ngrids), mesh)
                rho1 = None
                vG *= coulG
                vR = tools.ifft(vG, mesh).reshape(p1 - p0, naoj, ngrids)
                vG = None
                if vR_dm.dtype == np.double:
                    vR = vR.real
                for i in range(nset):
                    np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1])
                vR = None
            vR_dm *= expmikr.conj()

            for i in range(nset):
                vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T)
        t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1)

    # Function _ewald_exxdiv_for_G0 to add back in the G=0 component to vk_kpts
    # Note in the _ewald_exxdiv_for_G0 implementation, the G=0 treatments are
    # different for 1D/2D and 3D systems.  The special treatments for 1D and 2D
    # can only be used with AFTDF/GDF/MDF method.  In the FFTDF method, 1D, 2D
    # and 3D should use the ewald probe charge correction.
    if exxdiv == 'ewald':
        _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band)

    return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
Ejemplo n.º 30
0
    def add_wvvVV_(self, t1, t2, eris, t2new_tril, with_ovvv=False):
        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        assert (not self.direct)
        time0 = time.clock(), time.time()

        def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1):
            nao = tau.shape[-1]
            ic = i1 - i0
            jc = j1 - j0
            #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri)
            _dgemm('N', 'N',
                   nocc * (nocc + 1) // 2, jc * nao, ic * nao,
                   tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao),
                   t2new_tril.reshape(-1, nao * nao), 1, 1, i0 * nao, 0,
                   j0 * nao)

            #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri)
            _dgemm('N', 'T',
                   nocc * (nocc + 1) // 2, ic * nao, jc * nao,
                   tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao),
                   t2new_tril.reshape(-1, nao * nao), 1, 1, j0 * nao, 0,
                   i0 * nao)

        def contract_tril_(t2new_tril, tau, eri, a0, a):
            nvir = tau.shape[-1]
            #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri)
            _dgemm('N', 'N',
                   nocc * (nocc + 1) // 2, nvir, (a + 1 - a0) * nvir,
                   tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir),
                   t2new_tril.reshape(-1, nvir * nvir), 1, 1, a0 * nvir, 0,
                   a * nvir)

            #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a])
            if a > a0:
                _dgemm('N', 'T',
                       nocc * (nocc + 1) // 2, (a - a0) * nvir, nvir,
                       tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir),
                       t2new_tril.reshape(-1, nvir * nvir), 1, 1, a * nvir, 0,
                       a0 * nvir)

        nocc, nvir = t1.shape
        nvir_pair = nvir * (nvir + 1) // 2
        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        naux = eris.naux
        tau = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir))
        p0 = 0
        for i in range(nocc):
            tau[p0:p0 + i + 1] = numpy.einsum('a,jb->jab', t1[i], t1[:i + 1])
            tau[p0:p0 + i + 1] += t2[i, :i + 1]
            p0 += i + 1
        time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

        #TODO: check if vvL can be entirely load into memory
        max_memory = max(2000, self.max_memory - lib.current_memory()[0])
        dmax = max(4, numpy.sqrt(max_memory * .9e6 / 8 / nvir**2 / 2))
        vvblk = max(4, (max_memory * 1e6 / 8 - dmax**2 *
                        (nvir**2 * 1.5 + naux)) / naux)
        dmax = int(dmax)
        vvblk = int(vvblk)
        eribuf = numpy.empty((dmax, dmax, nvir_pair))
        loadbuf = numpy.empty((dmax, dmax, nvir, nvir))

        for i0, i1 in lib.prange(0, nvir, dmax):
            di = i1 - i0
            for j0, j1 in lib.prange(0, i0, dmax):
                dj = j1 - j0

                ijL = numpy.empty((di, dj, naux))
                for i in range(i0, i1):
                    ioff = i * (i + 1) // 2
                    ijL[i - i0] = eris.vvL[ioff + j0:ioff + j1]
                ijL = ijL.reshape(-1, naux)
                eri = numpy.ndarray(((i1 - i0) * (j1 - j0), nvir_pair),
                                    buffer=eribuf)
                for p0, p1 in lib.prange(0, nvir_pair, vvblk):
                    vvL = numpy.asarray(eris.vvL[p0:p1])
                    eri[:, p0:p1] = lib.ddot(ijL, vvL.T)
                    vvL = None

                tmp = numpy.ndarray((i1 - i0, nvir, j1 - j0, nvir),
                                    buffer=loadbuf)
                _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                       eri.ctypes.data_as(ctypes.c_void_p),
                                       (ctypes.c_int * 4)(i0, i1, j0, j1),
                                       ctypes.c_int(nvir))
                contract_rec_(t2new_tril, tau, tmp, i0, i1, j0, j1)
                time0 = logger.timer_debug1(
                    self, 'vvvv [%d:%d,%d:%d]' % (i0, i1, j0, j1), *time0)

            ijL = []
            for i in range(i0, i1):
                ioff = i * (i + 1) // 2
                ijL.append(eris.vvL[ioff + i0:ioff + i + 1])
            ijL = numpy.vstack(ijL).reshape(-1, naux)
            eri = numpy.ndarray((di * (di + 1) // 2, nvir_pair), buffer=eribuf)
            for p0, p1 in lib.prange(0, nvir_pair, vvblk):
                vvL = numpy.asarray(eris.vvL[p0:p1])
                eri[:, p0:p1] = lib.ddot(ijL, vvL.T)
                vvL = None
            for i in range(di):
                p0, p1 = i * (i + 1) // 2, (i + 1) * (i + 2) // 2
                tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf)
                contract_tril_(t2new_tril, tau, tmp, i0, i0 + i)
            time0 = logger.timer_debug1(
                self, 'vvvv [%d:%d,%d:%d]' % (i0, i1, i0, i1), *time0)
        eribuf = loadbuf = eri = tmp = None
        return t2new_tril
Ejemplo n.º 31
0
def get_jk_favork(sgx, dm, hermi=1, with_j=True, with_k=True,
                  direct_scf_tol=1e-13):
    t0 = time.clock(), time.time()
    mol = sgx.mol
    grids = sgx.grids
    gthrd = sgx.grids_thrd

    dms = numpy.asarray(dm)
    dm_shape = dms.shape
    nao = dm_shape[-1]
    dms = dms.reshape(-1,nao,nao)
    nset = dms.shape[0]

    if sgx.debug:
        batch_nuc = _gen_batch_nuc(mol)
    else:
        batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol,
                                  sgx._opt)
    t1 = logger.timer_debug1(mol, "sgX initialziation", *t0)

    sn = numpy.zeros((nao,nao))
    vj = numpy.zeros_like(dms)
    vk = numpy.zeros_like(dms)

    ngrids = grids.coords.shape[0]
    max_memory = sgx.max_memory - lib.current_memory()[0]
    sblk = sgx.blockdim
    blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2))))
    tnuc = 0, 0
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1,None]
        sn += lib.dot(ao.T, wao)

        fg = lib.einsum('gi,xij->xgj', wao, dms)
        mask = numpy.zeros(i1-i0, dtype=bool)
        for i in range(nset):
            mask |= numpy.any(fg[i]>gthrd, axis=1)
            mask |= numpy.any(fg[i]<-gthrd, axis=1)
        if not numpy.all(mask):
            ao = ao[mask]
            wao = wao[mask]
            fg = fg[:,mask]
            coords = coords[mask]

        if sgx.debug:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            gbn = batch_nuc(mol, coords)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()
            if with_j:
                jg = numpy.einsum('gij,xij->xg', gbn, dms)
            if with_k:
                gv = lib.einsum('gvt,xgt->xgv', gbn, fg)
            gbn = None
        else:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            jg, gv = batch_jk(mol, coords, dms, fg.copy())
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()

        if with_j:
            xj = lib.einsum('gv,xg->xgv', ao, jg)
            for i in range(nset):
                vj[i] += lib.einsum('gu,gv->uv', wao, xj[i])
        if with_k:
            for i in range(nset):
                vk[i] += lib.einsum('gu,gv->uv', ao, gv[i])
        jg = gv = None

    t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
    tdot = t2[0] - t1[0] - tnuc[0] , t2[1] - t1[1] - tnuc[1]
    logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
                  'for tensor contraction (%.2f, %.2f)',
                  tnuc[0], tnuc[1], tdot[0], tdot[1])

    ovlp = mol.intor_symmetric('int1e_ovlp')
    proj = scipy.linalg.solve(sn, ovlp)

    if with_j:
        vj = lib.einsum('pi,xpj->xij', proj, vj)
        vj = (vj + vj.transpose(0,2,1))*.5
    if with_k:
        vk = lib.einsum('pi,xpj->xij', proj, vk)
        if hermi == 1:
            vk = (vk + vk.transpose(0,2,1))*.5
    logger.timer(mol, "vj and vk", *t0)
    return vj.reshape(dm_shape), vk.reshape(dm_shape)
Ejemplo n.º 32
0
def _make_eris_outcore(mycc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    eris = _ChemistsERIs()
    eris._common_init_(mycc, mo_coeff)

    nocca, noccb = mycc.nocc
    nmoa, nmob = mycc.nmo
    nvira, nvirb = nmoa - nocca, nmob - noccb

    moa = eris.mo_coeff[0]
    mob = eris.mo_coeff[1]
    nmoa = moa.shape[1]
    nmob = mob.shape[1]

    orboa = moa[:, :nocca]
    orbob = mob[:, :noccb]
    orbva = moa[:, nocca:]
    orbvb = mob[:, noccb:]
    eris.feri = lib.H5TmpFile()
    eris.oooo = eris.feri.create_dataset('oooo', (nocca, nocca, nocca, nocca),
                                         'f8')
    eris.ovoo = eris.feri.create_dataset('ovoo', (nocca, nvira, nocca, nocca),
                                         'f8')
    eris.ovov = eris.feri.create_dataset('ovov', (nocca, nvira, nocca, nvira),
                                         'f8')
    eris.oovv = eris.feri.create_dataset('oovv', (nocca, nocca, nvira, nvira),
                                         'f8')
    eris.ovvo = eris.feri.create_dataset('ovvo', (nocca, nvira, nvira, nocca),
                                         'f8')
    eris.ovvv = eris.feri.create_dataset('ovvv', (nocca, nvira, nvira *
                                                  (nvira + 1) // 2), 'f8')
    #eris.vvvv = eris.feri.create_dataset('vvvv', (nvira,nvira,nvira,nvira), 'f8')
    eris.OOOO = eris.feri.create_dataset('OOOO', (noccb, noccb, noccb, noccb),
                                         'f8')
    eris.OVOO = eris.feri.create_dataset('OVOO', (noccb, nvirb, noccb, noccb),
                                         'f8')
    eris.OVOV = eris.feri.create_dataset('OVOV', (noccb, nvirb, noccb, nvirb),
                                         'f8')
    eris.OOVV = eris.feri.create_dataset('OOVV', (noccb, noccb, nvirb, nvirb),
                                         'f8')
    eris.OVVO = eris.feri.create_dataset('OVVO', (noccb, nvirb, nvirb, noccb),
                                         'f8')
    eris.OVVV = eris.feri.create_dataset('OVVV', (noccb, nvirb, nvirb *
                                                  (nvirb + 1) // 2), 'f8')
    #eris.VVVV = eris.feri.create_dataset('VVVV', (nvirb,nvirb,nvirb,nvirb), 'f8')
    eris.ooOO = eris.feri.create_dataset('ooOO', (nocca, nocca, noccb, noccb),
                                         'f8')
    eris.ovOO = eris.feri.create_dataset('ovOO', (nocca, nvira, noccb, noccb),
                                         'f8')
    eris.ovOV = eris.feri.create_dataset('ovOV', (nocca, nvira, noccb, nvirb),
                                         'f8')
    eris.ooVV = eris.feri.create_dataset('ooVV', (nocca, nocca, nvirb, nvirb),
                                         'f8')
    eris.ovVO = eris.feri.create_dataset('ovVO', (nocca, nvira, nvirb, noccb),
                                         'f8')
    eris.ovVV = eris.feri.create_dataset('ovVV', (nocca, nvira, nvirb *
                                                  (nvirb + 1) // 2), 'f8')
    #eris.vvVV = eris.feri.create_dataset('vvVV', (nvira,nvira,nvirb,nvirb), 'f8')
    eris.OVoo = eris.feri.create_dataset('OVoo', (noccb, nvirb, nocca, nocca),
                                         'f8')
    eris.OOvv = eris.feri.create_dataset('OOvv', (noccb, noccb, nvira, nvira),
                                         'f8')
    eris.OVvo = eris.feri.create_dataset('OVvo', (noccb, nvirb, nvira, nocca),
                                         'f8')
    eris.OVvv = eris.feri.create_dataset('OVvv', (noccb, nvirb, nvira *
                                                  (nvira + 1) // 2), 'f8')

    cput1 = time.clock(), time.time()
    mol = mycc.mol
    # <ij||pq> = <ij|pq> - <ij|qp> = (ip|jq) - (iq|jp)
    tmpf = lib.H5TmpFile()
    if nocca > 0:
        ao2mo.general(mol, (orboa, moa, moa, moa), tmpf, 'aa')
        buf = np.empty((nmoa, nmoa, nmoa))
        for i in range(nocca):
            lib.unpack_tril(tmpf['aa'][i * nmoa:(i + 1) * nmoa], out=buf)
            eris.oooo[i] = buf[:nocca, :nocca, :nocca]
            eris.ovoo[i] = buf[nocca:, :nocca, :nocca]
            eris.ovov[i] = buf[nocca:, :nocca, nocca:]
            eris.oovv[i] = buf[:nocca, nocca:, nocca:]
            eris.ovvo[i] = buf[nocca:, nocca:, :nocca]
            eris.ovvv[i] = lib.pack_tril(buf[nocca:, nocca:, nocca:])
        del (tmpf['aa'])

    if noccb > 0:
        buf = np.empty((nmob, nmob, nmob))
        ao2mo.general(mol, (orbob, mob, mob, mob), tmpf, 'bb')
        for i in range(noccb):
            lib.unpack_tril(tmpf['bb'][i * nmob:(i + 1) * nmob], out=buf)
            eris.OOOO[i] = buf[:noccb, :noccb, :noccb]
            eris.OVOO[i] = buf[noccb:, :noccb, :noccb]
            eris.OVOV[i] = buf[noccb:, :noccb, noccb:]
            eris.OOVV[i] = buf[:noccb, noccb:, noccb:]
            eris.OVVO[i] = buf[noccb:, noccb:, :noccb]
            eris.OVVV[i] = lib.pack_tril(buf[noccb:, noccb:, noccb:])
        del (tmpf['bb'])

    if nocca > 0:
        buf = np.empty((nmoa, nmob, nmob))
        ao2mo.general(mol, (orboa, moa, mob, mob), tmpf, 'ab')
        for i in range(nocca):
            lib.unpack_tril(tmpf['ab'][i * nmoa:(i + 1) * nmoa], out=buf)
            eris.ooOO[i] = buf[:nocca, :noccb, :noccb]
            eris.ovOO[i] = buf[nocca:, :noccb, :noccb]
            eris.ovOV[i] = buf[nocca:, :noccb, noccb:]
            eris.ooVV[i] = buf[:nocca, noccb:, noccb:]
            eris.ovVO[i] = buf[nocca:, noccb:, :noccb]
            eris.ovVV[i] = lib.pack_tril(buf[nocca:, noccb:, noccb:])
        del (tmpf['ab'])

    if noccb > 0:
        buf = np.empty((nmob, nmoa, nmoa))
        ao2mo.general(mol, (orbob, mob, moa, moa), tmpf, 'ba')
        for i in range(noccb):
            lib.unpack_tril(tmpf['ba'][i * nmob:(i + 1) * nmob], out=buf)
            eris.OVoo[i] = buf[noccb:, :nocca, :nocca]
            eris.OOvv[i] = buf[:noccb, nocca:, nocca:]
            eris.OVvo[i] = buf[noccb:, nocca:, :nocca]
            eris.OVvv[i] = lib.pack_tril(buf[noccb:, nocca:, nocca:])
        del (tmpf['ba'])
    buf = None
    cput1 = logger.timer_debug1(mycc, 'transforming oopq, ovpq', *cput1)

    if not mycc.direct:
        ao2mo.full(mol, orbva, eris.feri, dataname='vvvv')
        ao2mo.full(mol, orbvb, eris.feri, dataname='VVVV')
        ao2mo.general(mol, (orbva, orbva, orbvb, orbvb),
                      eris.feri,
                      dataname='vvVV')
        eris.vvvv = eris.feri['vvvv']
        eris.VVVV = eris.feri['VVVV']
        eris.vvVV = eris.feri['vvVV']
        cput1 = logger.timer_debug1(mycc, 'transforming vvvv', *cput1)

    return eris
Ejemplo n.º 33
0
def get_jk(mf_grad,
           mol=None,
           dm=None,
           hermi=0,
           with_j=True,
           with_k=True,
           ishf=True):
    t0 = (time.clock(), time.time())
    if mol is None: mol = mf_grad.mol
    if dm is None: dm = mf_grad.base.make_rdm1()

    with_df = mf_grad.base.with_df
    auxmol = with_df.auxmol
    if auxmol is None:
        auxmol = df.addons.make_auxmol(with_df.mol, with_df.auxbasis)
    pmol = mol + auxmol
    ao_loc = mol.ao_loc
    nbas = mol.nbas
    nauxbas = auxmol.nbas

    get_int3c_s1 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's1')
    get_int3c_s2 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij')
    get_int3c_ip1 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip1', 's1')
    get_int3c_ip2 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij')

    nao = mol.nao
    naux = auxmol.nao
    dms = numpy.asarray(dm)
    out_shape = dms.shape[:-2] + (3, ) + dms.shape[-2:]
    dms = dms.reshape(-1, nao, nao)
    nset = dms.shape[0]

    idx = numpy.arange(nao)
    idx = idx * (idx + 1) // 2 + idx
    dm_tril = dms + dms.transpose(0, 2, 1)
    dm_tril = lib.pack_tril(dm_tril)
    dm_tril[:, idx] *= .5

    auxslices = auxmol.aoslice_by_atom()
    aux_loc = auxmol.ao_loc
    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = int(min(max(max_memory * .5e6 / 8 / (nao**2 * 3), 20), naux,
                      240))
    ao_ranges = balance_partition(aux_loc, blksize)

    if not with_k:

        # (i,j|P)
        rhoj = numpy.empty((nset, naux))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_s2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            rhoj[:, p0:p1] = lib.einsum('wp,nw->np', int3c, dm_tril)
            int3c = None

        # (P|Q)
        int2c = auxmol.intor('int2c2e', aosym='s1')
        rhoj = scipy.linalg.solve(int2c, rhoj.T, sym_pos=True).T
        int2c = None

        # (d/dX i,j|P)
        vj = numpy.zeros((nset, 3, nao, nao))
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            vj += lib.einsum('xijp,np->nxij', int3c, rhoj[:, p0:p1])
            int3c = None

        if mf_grad.auxbasis_response:
            # (i,j|d/dX P)
            vjaux = numpy.empty((nset, nset, 3, naux))
            for shl0, shl1, nL in ao_ranges:
                int3c = get_int3c_ip2(
                    (0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
                p0, p1 = aux_loc[shl0], aux_loc[shl1]
                vjaux[:, :, :, p0:p1] = lib.einsum('xwp,mw,np->mnxp', int3c,
                                                   dm_tril, rhoj[:, p0:p1])
                int3c = None

            # (d/dX P|Q)
            int2c_e1 = auxmol.intor('int2c2e_ip1', aosym='s1')
            vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj)

            vjaux = numpy.array([
                -vjaux[:, :, :, p0:p1].sum(axis=3)
                for p0, p1 in auxslices[:, 2:]
            ])
            if ishf:
                vjaux = vjaux.sum((1, 2))
            else:
                vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3))
            vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        else:
            vj = -vj.reshape(out_shape)
        logger.timer(mf_grad, 'df vj', *t0)
        return vj, None

    if hasattr(dm, 'mo_coeff') and hasattr(dm, 'mo_occ'):
        mo_coeff = dm.mo_coeff
        mo_occ = dm.mo_occ
    elif ishf:
        mo_coeff = mf_grad.base.mo_coeff
        mo_occ = mf_grad.base.mo_occ
        if isinstance(mf_grad.base, scf.rohf.ROHF):
            mo_coeff = numpy.vstack((mo_coeff, mo_coeff))
            mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double)
            mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double)
            assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum())
            mo_occ = numpy.vstack((mo_occa, mo_occb))
    else:
        s0 = mol.intor('int1e_ovlp')
        mo_occ = []
        mo_coeff = []
        for dm in dms:
            sdms = reduce(lib.dot, (s0, dm, s0))
            n, c = scipy.linalg.eigh(sdms, b=s0)
            mo_occ.append(n)
            mo_coeff.append(c)
        mo_occ = numpy.stack(mo_occ, axis=0)
    nmo = mo_occ.shape[-1]

    mo_coeff = numpy.asarray(mo_coeff).reshape(-1, nao, nmo)
    mo_occ = numpy.asarray(mo_occ).reshape(-1, nmo)
    rhoj = numpy.zeros((nset, naux))
    f_rhok = lib.H5TmpFile()
    orbor = []
    orbol = []
    nocc = []
    orbor_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F')
    orbol_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F')
    offs = 0
    for i in range(nset):
        idx = numpy.abs(mo_occ[i]) > 1e-8
        nocc.append(numpy.count_nonzero(idx))
        c = mo_coeff[i][:, idx]
        orbol_stack = numpy.append(orbol_stack, c, axis=1)
        orbol.append(orbol_stack[:, offs:offs + nocc[-1]])
        cn = lib.einsum('pi,i->pi', c, mo_occ[i][idx])
        orbor_stack = numpy.append(orbor_stack, cn, axis=1)
        orbor.append(orbor_stack[:, offs:offs + nocc[-1]])
        offs += nocc[-1]

    # (P|Q)
    int2c = scipy.linalg.cho_factor(auxmol.intor('int2c2e', aosym='s1'))

    t1 = (time.clock(), time.time())
    max_memory = mf_grad.max_memory - lib.current_memory()[0]
    blksize = max_memory * .5e6 / 8 / (naux * nao)
    mol_ao_ranges = balance_partition(ao_loc, blksize)
    nsteps = len(mol_ao_ranges)
    t2 = t1
    for istep, (shl0, shl1, nd) in enumerate(mol_ao_ranges):
        int3c = get_int3c_s1((0, nbas, shl0, shl1, 0, nauxbas))
        t2 = logger.timer_debug1(mf_grad, 'df grad intor (P|mn)', *t2)
        p0, p1 = ao_loc[shl0], ao_loc[shl1]
        for i in range(nset):
            # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling
            v = lib.dot(int3c.reshape(nao, -1, order='F').T,
                        orbor[i]).reshape(naux, (p1 - p0) * nocc[i])
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad einsum (P|mn) u_ni N_i = v_Pmi',
                                     *t2)
            rhoj[i] += numpy.dot(v, orbol[i][p0:p1].ravel())
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad einsum v_Pmi u_mi = rho_P', *t2)
            v = scipy.linalg.cho_solve(int2c, v)
            t2 = logger.timer_debug1(mf_grad,
                                     'df grad cho_solve (P|Q) D_Qmi = v_Pmi',
                                     *t2)
            f_rhok['%s/%s' % (i, istep)] = v.reshape(naux, p1 - p0, -1)
            t2 = logger.timer_debug1(
                mf_grad,
                'df grad cache D_Pmi (m <-> i transpose upon retrieval)', *t2)
        int3c = v = None

    rhoj = scipy.linalg.cho_solve(int2c, rhoj.T).T
    int2c = None
    t1 = logger.timer_debug1(
        mf_grad, 'df grad vj and vk AO (P|Q) D_Q = (P|mn) D_mn solve', *t1)

    def load(set_id, p0, p1):
        buf = numpy.empty((p1 - p0, nocc[set_id], nao))
        col1 = 0
        for istep in range(nsteps):
            dat = f_rhok['%s/%s' % (set_id, istep)][p0:p1]
            col0, col1 = col1, col1 + dat.shape[1]
            buf[:p1 - p0, :, col0:col1] = dat.transpose(0, 2, 1)
        return buf

    vj = numpy.zeros((nset, 3, nao, nao))
    vk = numpy.zeros((nset, 3, nao, nao))
    # (d/dX i,j|P)
    fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s1  # MO output index slower than AO output index; input AOs are asymmetric
    fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv  # comp and aux indices are slower
    ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s1  # input is not tril_packed
    null = lib.c_null_ptr()
    t2 = t1
    for shl0, shl1, nL in ao_ranges:
        int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0,
                               shl1)).transpose(0, 3, 2,
                                                1)  # (P|mn'), row-major order
        t2 = logger.timer_debug1(mf_grad, "df grad intor (P|mn')", *t2)
        p0, p1 = aux_loc[shl0], aux_loc[shl1]
        for i in range(nset):
            # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling
            vj[i, 0] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[0].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            vj[i, 1] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[1].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            vj[i, 2] += numpy.dot(rhoj[i, p0:p1],
                                  int3c[2].reshape(p1 - p0,
                                                   -1)).reshape(nao, nao).T
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum rho_P (P|mn') rho_P", *t2)
            tmp = numpy.empty((3, p1 - p0, nocc[i], nao),
                              dtype=orbol_stack.dtype)
            fdrv(
                ftrans,
                fmmm,  # xPmn u_mi -> xPin
                tmp.ctypes.data_as(ctypes.c_void_p),
                int3c.ctypes.data_as(ctypes.c_void_p),
                orbol[i].ctypes.data_as(ctypes.c_void_p),
                ctypes.c_int(3 * (p1 - p0)),
                ctypes.c_int(nao),
                (ctypes.c_int * 4)(0, nocc[i], 0, nao),
                null,
                ctypes.c_int(0))
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum (P|mn') u_mi = dg_Pin",
                                     *t2)
            rhok = load(i, p0, p1)
            vk[i] += lib.einsum('xpoi,pok->xik', tmp, rhok)
            t2 = logger.timer_debug1(mf_grad,
                                     "df grad einsum D_Pim dg_Pin = v_ij", *t2)
            rhok = tmp = None
        int3c = None
    t1 = logger.timer_debug1(mf_grad, 'df grad vj and vk AO (P|mn) D_P eval',
                             *t1)

    if mf_grad.auxbasis_response:
        # Cache (P|uv) D_ui c_vj. Must be include both upper and lower triangles
        # over nset.
        max_memory = mf_grad.max_memory - lib.current_memory()[0]
        blksize = int(
            min(max(max_memory * .5e6 / 8 / (nao * max(nocc)), 20), naux))
        rhok_oo = []
        for i, j in product(range(nset), repeat=2):
            tmp = numpy.empty((naux, nocc[i], nocc[j]))
            for p0, p1 in lib.prange(0, naux, blksize):
                rhok = load(i, p0, p1).reshape((p1 - p0) * nocc[i], nao)
                tmp[p0:p1] = lib.dot(rhok,
                                     orbol[j]).reshape(p1 - p0, nocc[i],
                                                       nocc[j])
            rhok_oo.append(tmp)
            rhok = tmp = None
        t1 = logger.timer_debug1(
            mf_grad, 'df grad vj and vk aux d_Pim u_mj = d_Pij eval', *t1)

        vjaux = numpy.zeros((nset, nset, 3, naux))
        vkaux = numpy.zeros((nset, nset, 3, naux))
        # (i,j|d/dX P)
        t2 = t1
        fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2  # MO output index slower than AO output index; input AOs are symmetric
        fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv  # comp and aux indices are slower
        ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2  # input is tril_packed
        null = lib.c_null_ptr()
        for shl0, shl1, nL in ao_ranges:
            int3c = get_int3c_ip2((0, nbas, 0, nbas, shl0, shl1))  # (i,j|P)
            t2 = logger.timer_debug1(mf_grad, "df grad intor (P'|mn)", *t2)
            p0, p1 = aux_loc[shl0], aux_loc[shl1]
            drhoj = lib.dot(
                int3c.transpose(0, 2, 1).reshape(3 * (p1 - p0), -1),
                dm_tril.T).reshape(3, p1 - p0, -1)  # xpij,mij->xpm
            vjaux[:, :, :, p0:p1] = lib.einsum('xpm,np->mnxp', drhoj,
                                               rhoj[:, p0:p1])
            t2 = logger.timer_debug1(
                mf_grad, "df grad einsum rho_P (P'|mn) D_mn = v_P", *t2)
            tmp = [
                numpy.empty((3, p1 - p0, nocc_i, nao), dtype=orbor_stack.dtype)
                for nocc_i in nocc
            ]
            assert (orbor_stack.flags.f_contiguous), '{} {}'.format(
                orbor_stack.shape, orbor_stack.strides)
            for orb, buf, nocc_i in zip(orbol, tmp, nocc):
                fdrv(
                    ftrans,
                    fmmm,  # gPmn u_ni -> gPim
                    buf.ctypes.data_as(ctypes.c_void_p),
                    int3c.ctypes.data_as(ctypes.c_void_p),
                    orb.ctypes.data_as(ctypes.c_void_p),
                    ctypes.c_int(3 * (p1 - p0)),
                    ctypes.c_int(nao),
                    (ctypes.c_int * 4)(0, nocc_i, 0, nao),
                    null,
                    ctypes.c_int(0))
            int3c = [[
                lib.dot(buf.reshape(-1, nao),
                        orb).reshape(3, p1 - p0, -1, norb)
                for orb, norb in zip(orbor, nocc)
            ] for buf in tmp]  # pim,mj,j -> pij
            t2 = logger.timer_debug1(
                mf_grad, "df grad einsum (P'|mn) u_mi u_nj N_j = v_Pmn", *t2)
            for i, j in product(range(nset), repeat=2):
                k = (i * nset) + j
                tmp = rhok_oo[k][p0:p1]
                vkaux[i, j, :, p0:p1] += lib.einsum('xpij,pij->xp',
                                                    int3c[i][j], tmp)
                t2 = logger.timer_debug1(mf_grad,
                                         "df grad einsum d_Pij v_Pij = v_P",
                                         *t2)
        int3c = tmp = None
        t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|mn) eval",
                                 *t1)

        # (d/dX P|Q)
        int2c_e1 = auxmol.intor('int2c2e_ip1')
        vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj)
        for i, j in product(range(nset), repeat=2):
            k = (i * nset) + j
            l = (j * nset) + i
            tmp = lib.einsum('pij,qji->pq', rhok_oo[k], rhok_oo[l])
            vkaux[i, j] -= lib.einsum('xpq,pq->xp', int2c_e1, tmp)
        t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|Q) eval",
                                 *t1)

        vjaux = numpy.array([
            -vjaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:]
        ])
        vkaux = numpy.array([
            -vkaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:]
        ])
        if ishf:
            vjaux = vjaux.sum((1, 2))
            idx = numpy.array(list(range(nset))) * (nset + 1)
            vkaux = vkaux.reshape((nset**2, 3, mol.natm))[idx, :, :].sum(0)
        else:
            vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3))
            vkaux = numpy.ascontiguousarray(vkaux.transpose(1, 2, 0, 3))
        vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux))
        vk = lib.tag_array(-vk.reshape(out_shape), aux=numpy.array(vkaux))
    else:
        vj = -vj.reshape(out_shape)
        vk = -vk.reshape(out_shape)
    logger.timer(mf_grad, 'df grad vj and vk', *t0)
    return vj, vk
Ejemplo n.º 34
0
    def build(self, omega=None, direct_scf_tol=None):
        cpu0 = (time.clock(), time.time())
        cell = self.cell
        kpts = self.kpts

        k_scaled = cell.get_scaled_kpts(kpts).sum(axis=0)
        k_mod_to_half = k_scaled * 2 - (k_scaled * 2).round(0)
        if abs(k_mod_to_half).sum() > 1e-5:
            raise NotImplementedError('k-points must be symmetryic')

        if omega is not None:
            self.omega = omega

        if self.omega is None:
            # Search a proper range-separation parameter omega that can balance the
            # computational cost between the real space integrals and moment space
            # integrals
            self.omega, self.mesh, self.ke_cutoff = _guess_omega(
                cell, kpts, self.mesh)
        else:
            self.ke_cutoff = aft.estimate_ke_cutoff_for_omega(cell, self.omega)
            self.mesh = pbctools.cutoff_to_mesh(cell.lattice_vectors(),
                                                self.ke_cutoff)

        logger.info(self, 'omega = %.15g  ke_cutoff = %s  mesh = %s',
                    self.omega, self.ke_cutoff, self.mesh)

        if direct_scf_tol is None:
            direct_scf_tol = cell.precision**1.5
            logger.debug(self, 'Set direct_scf_tol %g', direct_scf_tol)

        self.cell_rs = cell_rs = _re_contract_cell(cell, self.ke_cutoff)
        self.bvk_kmesh = kmesh = k2gamma.kpts_to_kmesh(cell_rs, kpts)
        bvkcell, phase = k2gamma.get_phase(cell_rs, kpts, kmesh)
        self.bvkmesh_Ls = Ks = k2gamma.translation_vectors_for_kmesh(
            cell_rs, kmesh)
        self.bvkcell = bvkcell
        self.phase = phase

        # Given ke_cutoff, eta corresponds to the most steep Gaussian basis
        # of which the Coulomb integrals can be accurately computed in moment
        # space.
        eta = aft.estimate_eta_for_ke_cutoff(cell,
                                             self.ke_cutoff,
                                             precision=cell.precision)
        # * Assuming the most steep function in smooth basis has exponent eta,
        # with attenuation parameter omega, rcut_sr is the distance of which
        # the value of attenuated Coulomb integrals of four shells |eta> is
        # smaller than the required precision.
        # * The attenuated coulomb integrals between four s-type Gaussians
        # (2*a/pi)^{3/4}exp(-a*r^2) is
        #   (erfc(omega*a^0.5/(omega^2+a)^0.5*R) - erfc(a^0.5*R)) / R
        # if two Gaussians on one center and the other two on another center
        # and the distance between the two centers are R.
        # * The attenuated coulomb integrals between two spherical charge
        # distributions is
        #   ~(pi/eta)^3/2 (erfc(tau*(eta/2)^0.5*R) - erfc((eta/2)^0.5*R)) / R
        #       tau = omega/sqrt(omega^2 + eta/2)
        # if the spherical charge distribution is the product of above s-type
        # Gaussian with exponent eta and a very smooth function.
        # When R is large, the attenuated Coulomb integral is
        #   ~= (pi/eta)^3/2 erfc(tau*(eta/2)^0.5*R) / R
        #   ~= pi/(tau*eta^2*R^2) exp(-tau^2*eta*R^2/2)
        tau = self.omega / (self.omega**2 + eta / 2)**.5
        rcut_sr = 10  # initial guess
        rcut_sr = (-np.log(direct_scf_tol * tau * (eta * rcut_sr)**2 / np.pi) /
                   (tau**2 * eta / 2))**.5
        logger.debug(self, 'eta = %g  rcut_sr = %g', eta, rcut_sr)

        # Ls is the translation vectors to mimic periodicity of a cell
        Ls = bvkcell.get_lattice_Ls(rcut=cell.rcut + rcut_sr)
        self.supmol_Ls = Ls = Ls[np.linalg.norm(Ls, axis=1).argsort()]

        supmol = _make_extended_mole(cell_rs, Ls, Ks, self.omega,
                                     direct_scf_tol)
        self.supmol = supmol

        nkpts = len(self.bvkmesh_Ls)
        nbas = cell_rs.nbas
        n_steep, n_local, n_diffused = cell_rs._nbas_each_set
        n_compact = n_steep + n_local
        bas_mask = supmol._bas_mask

        self.bvk_bas_mask = bvk_bas_mask = bas_mask.any(axis=2)
        # Some basis in bvk-cell are not presented in the supmol. They can be
        # skipped when computing SR integrals
        self.bvkcell._bas = bvkcell._bas[bvk_bas_mask.ravel()]

        # Record the mapping between the dense bvkcell basis and the
        # original sparse bvkcell basis
        bvk_cell_idx = np.repeat(np.arange(nkpts)[:, None], nbas, axis=1)
        self.bvk_cell_id = bvk_cell_idx[bvk_bas_mask].astype(np.int32)
        cell0_shl_idx = np.repeat(np.arange(nbas)[None, :], nkpts, axis=0)
        self.cell0_shl_id = cell0_shl_idx[bvk_bas_mask].astype(np.int32)

        logger.timer_debug1(self, 'initializing supmol', *cpu0)
        logger.info(self, 'sup-mol nbas = %d cGTO = %d pGTO = %d', supmol.nbas,
                    supmol.nao, supmol.npgto_nr())

        supmol.omega = -self.omega  # Set short range coulomb
        with supmol.with_integral_screen(direct_scf_tol**2):
            vhfopt = _vhf.VHFOpt(supmol,
                                 'int2e_sph',
                                 qcondname=libpbc.PBCVHFsetnr_direct_scf)
        vhfopt.direct_scf_tol = direct_scf_tol
        self.vhfopt = vhfopt
        logger.timer(self, 'initializing vhfopt', *cpu0)

        q_cond = vhfopt.get_q_cond((supmol.nbas, supmol.nbas))
        idx = supmol._images_loc
        bvk_q_cond = lib.condense('NP_absmax', q_cond, idx, idx)
        ovlp_mask = bvk_q_cond > direct_scf_tol
        # Remove diffused-diffused block
        if n_diffused > 0:
            diffused_mask = np.zeros_like(bvk_bas_mask)
            diffused_mask[:, n_compact:] = True
            diffused_mask = diffused_mask[bvk_bas_mask]
            ovlp_mask[diffused_mask[:, None] & diffused_mask] = False
        self.ovlp_mask = ovlp_mask.astype(np.int8)

        # mute rcut_threshold, divide basis into two sets only
        cell_lr_aft = _re_contract_cell(cell, self.ke_cutoff, -1, verbose=0)
        self.lr_aft = lr_aft = _LongRangeAFT(cell_lr_aft, kpts, self.omega,
                                             self.bvk_kmesh)
        lr_aft.ke_cutoff = self.ke_cutoff
        lr_aft.mesh = self.mesh
        lr_aft.eta = eta
        return self
Ejemplo n.º 35
0
def get_ontop_pair_density(ot,
                           rho,
                           ao,
                           oneCDMs,
                           twoCDM_amo,
                           ao2amo,
                           deriv=0,
                           non0tab=None):
    r''' Pi(r) = i(r)*j(r)*k(r)*l(r)*d_ijkl / 2
               = rho[0](r)*rho[1](r) + i(r)*j(r)*k(r)*l(r)*l_ijkl / 2

        Args:
            ot : on-top pair density functional object
            rho : ndarray of shape (2,*,ngrids) 
                contains spin density [and derivatives] 
            ao : ndarray of shape (*, ngrids, nao)
                contains values of aos [and derivatives] 
            oneCDMs : ndarray of shape (2, nao, nao)
                contains spin-separated 1-RDM
            twoCDM_amo : ndarray of shape (mc.ncas, mc.ncas, mc.ncas, mc.ncas)
                contains spin-summed two-body cumulant density matrix in active space
            ao2amo : ndarray of shape (nao, ncas)
                molecular-orbital coefficients for active-space orbitals

        Kwargs:
            deriv : derivative order through which to calculate. Default is 0. 
                deriv > 1 not implemented
            non0tab : as in pyscf.dft.gen_grid and pyscf.dft.numint

        Returns : ndarray of shape (*,ngrids)
            The on-top pair density and its derivatives if requested
            deriv = 0 : value (1d array)
            deriv = 1 : value, d/dx, d/dy, d/dz
            deriv = 2 : value, d/dx, d/dy, d/dz, d^2/d|r1-r2|^2_(r1=r2)
            

    '''
    # Fix dimensionality of rho and ao
    if rho.ndim == 2:
        rho = rho.reshape(rho.shape[0], 1, rho.shape[1])
    if ao.ndim == 2:
        ao = ao.reshape(1, ao.shape[0], ao.shape[1])

    # Debug code for ultra-slow, ultra-high-memory but very safe implementation
    if ot.verbose > logger.DEBUG:
        logger.debug(
            ot, 'Warning: memory-intensive cacheing of full 2RDM for testing '
            'purposes initiated; reduce verbosity to increase speed and memory efficiency'
        )
        twoRDM = represent_operator_in_basis(twoCDM_amo, ao2amo.conjugate().T)
        twoRDM = get_2RDM_from_2CDM(twoRDM, oneCDMs)

    # First cumulant and derivatives (chain rule! product rule!)
    t0 = (time.process_time(), time.time())
    Pi = np.zeros_like(rho[0])
    Pi[0] = rho[0, 0] * rho[1, 0]
    if deriv > 0:
        assert (rho.shape[1] >= 4), rho.shape
        assert (ao.shape[0] >= 4), ao.shape
        for ideriv in range(1, 4):
            Pi[ideriv] = rho[0, ideriv] * rho[1, 0] + rho[0, 0] * rho[1,
                                                                      ideriv]
    if deriv > 1:
        assert (rho.shape[1] >= 6), rho.shape
        assert (ao.shape[0] >= 10), ao.shape
        Pi[4] = -(rho[:, 1:4].sum(0).conjugate() *
                  rho[:, 1:4].sum(0)).sum(0) / 4
        Pi[4] += rho[0, 0] * (rho[1, 4] / 4 + rho[0, 5] * 2)
        Pi[4] += rho[1, 0] * (rho[0, 4] / 4 + rho[1, 5] * 2)
    t0 = logger.timer_debug1(ot, 'otpd first cumulant', *t0)

    # Second cumulant and derivatives (chain rule! product rule!)
    # dot, tensordot, and sum are hugely faster than np.einsum
    # but whether or when they actually multithread is unclear
    # Update 05/11/2020: ao is actually stored in row-major order
    # = (deriv,AOs,grids).
    #grid2amo_ref = np.tensordot (ao, ao2amo, axes=1) #np.einsum ('ijk,kl->ijl', ao, ao2amo)
    grid2amo = _grid_ao2mo(ot.mol, ao, ao2amo, non0tab=non0tab)
    t0 = logger.timer(ot, 'otpd ao2mo', *t0)
    gridkern = np.zeros(grid2amo.shape + (grid2amo.shape[2], ),
                        dtype=grid2amo.dtype)
    gridkern[0] = grid2amo[0, :, :, np.newaxis] * grid2amo[
        0, :, np.newaxis, :]  # r_0ai,  r_0aj  -> r_0aij
    wrk0 = np.tensordot(gridkern[0], twoCDM_amo,
                        axes=2)  # r_0aij, P_ijkl -> P_0akl
    Pi[0] += (gridkern[0] * wrk0).sum((1, 2)) / 2  # r_0aij, P_0aij -> P_0a
    t0 = logger.timer_debug1(ot, 'otpd second cumulant 0th derivative', *t0)
    if ot.verbose > logger.DEBUG:
        logger.debug(
            ot,
            'Warning: slow einsum-based testing calculation of Pi initiated; '
            'reduce verbosity to increase speed and memory efficiency')
        test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[0], ao[0], ao[0],
                            ao[0]) / 2
        logger.debug(ot, "Pi, |tensordot_formula - einsum_formula| = %s",
                     linalg.norm(Pi[0] - test_Pi))
        t0 = logger.timer(ot, 'otpd 0th derivative debug'.format(ideriv), *t0)
    if deriv > 0:
        for ideriv in range(1, 4):
            # Fourfold tensor symmetry ijkl = klij = jilk = lkji & product rule -> factor of 4
            gridkern[ideriv] = grid2amo[ideriv, :, :, np.newaxis] * grid2amo[
                0, :, np.newaxis, :]  # r_1ai,  r_0aj  -> r_1aij
            Pi[ideriv] += (gridkern[ideriv] * wrk0).sum(
                (1, 2)) * 2  # r_1aij, P_0aij -> P_1a
            t0 = logger.timer_debug1(
                ot, 'otpd second cumulant 1st derivative ({})'.format(ideriv),
                *t0)
            if ot.verbose > logger.DEBUG:
                logger.debug(
                    ot,
                    'Warning: slow einsum-based testing calculation of Pi\'s first derivatives initiated; '
                    'reduce verbosity to increase speed and memory efficiency')
                test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[ideriv],
                                    ao[0], ao[0], ao[0]) / 2
                test_Pi += np.einsum('ijkl,aj,ai,ak,al->a', twoRDM, ao[ideriv],
                                     ao[0], ao[0], ao[0]) / 2
                test_Pi += np.einsum('ijkl,ak,ai,aj,al->a', twoRDM, ao[ideriv],
                                     ao[0], ao[0], ao[0]) / 2
                test_Pi += np.einsum('ijkl,al,ai,aj,ak->a', twoRDM, ao[ideriv],
                                     ao[0], ao[0], ao[0]) / 2
                logger.debug(
                    ot,
                    "Pi derivative, |tensordot_formula - einsum_formula| = %s",
                    linalg.norm(Pi[ideriv] - test_Pi))
                t0 = logger.timer(
                    ot, 'otpd 1st derivative ({}) debug'.format(ideriv), *t0)
    if deriv > 1:  # The fifth slot is allocated to the "off-top Laplacian," i.e., nabla_(r1-r2)^2 Pi(r1,r2)|(r1=r2)
        # nabla_off^2 Pi = 1/2 d^ik_jl * ([nabla_r^2 phi_i] phi_j phi_k phi_l + {1 - p_jk - p_jl}[nabla_r phi_i . nabla_r phi_j] phi_k phi_l)
        # using four-fold symmetry a lot! be careful!
        if ot.verbose > logger.DEBUG:
            test2_Pi = Pi[4].copy()
        XX, YY, ZZ = 4, 7, 9
        gridkern[4] = grid2amo[[XX, YY, ZZ], :, :, np.newaxis].sum(
            0) * grid2amo[0, :, np.newaxis, :]  # r_2ai, r_0aj -> r_2aij
        gridkern[4] += (grid2amo[1:4, :, :, np.newaxis] *
                        grid2amo[1:4, :, np.newaxis, :]).sum(
                            0)  # r_1ai, r_1aj -> r_2aij
        wrk1 = np.tensordot(gridkern[1:4], twoCDM_amo,
                            axes=2)  # r_1aij, P_ijkl -> P_1akl
        Pi[4] += (gridkern[4] * wrk0).sum((1, 2)) / 2  # r_2aij, P_0aij -> P_2a
        Pi[4] -= (
            (gridkern[1:4] + gridkern[1:4].transpose(0, 1, 3, 2)) * wrk1).sum(
                (0, 2, 3)) / 2  # r_1aij, P_1aij -> P_2a
        t0 = logger.timer(ot, 'otpd second cumulant off-top Laplacian', *t0)
        if ot.verbose > logger.DEBUG:
            logger.debug(
                ot,
                'Warning: slow einsum-based testing calculation of Pi\'s second derivatives initiated; '
                'reduce verbosity to increase speed and memory efficiency')
            X, Y, Z = 1, 2, 3
            test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[XX], ao[0],
                                ao[0], ao[0]) / 2
            test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[YY], ao[0],
                                 ao[0], ao[0]) / 2
            test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[ZZ], ao[0],
                                 ao[0], ao[0]) / 2
            test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[X],
                                 ao[0], ao[0]) / 2
            test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[Y],
                                 ao[0], ao[0]) / 2
            test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[Z],
                                 ao[0], ao[0]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[0],
                                 ao[X], ao[0]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[0],
                                 ao[Y], ao[0]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[0],
                                 ao[Z], ao[0]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[0],
                                 ao[0], ao[X]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[0],
                                 ao[0], ao[Y]) / 2
            test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[0],
                                 ao[0], ao[Z]) / 2
            logger.debug(
                ot,
                'Pi off-top Laplacian, |tensordot formula - einsum_formula| = %s',
                linalg.norm(Pi[4] - test_Pi))

            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[XX], grid2amo[0], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[YY], grid2amo[0], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[ZZ], grid2amo[0], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[X], grid2amo[X], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Y], grid2amo[Y], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Z], grid2amo[Z], grid2amo[0],
                                  grid2amo[0]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[X], grid2amo[0], grid2amo[X],
                                  grid2amo[0]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Y], grid2amo[0], grid2amo[Y],
                                  grid2amo[0]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Z], grid2amo[0], grid2amo[Z],
                                  grid2amo[0]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[X], grid2amo[0], grid2amo[0],
                                  grid2amo[X]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Y], grid2amo[0], grid2amo[0],
                                  grid2amo[Y]) / 2
            test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo,
                                  grid2amo[Z], grid2amo[0], grid2amo[0],
                                  grid2amo[Z]) / 2
            logger.debug(
                ot,
                'Pi off-top Laplacian, testing second cumulant only |tensordot formula - einsum_formula| = %s',
                linalg.norm(Pi[4] - test2_Pi))

            t0 = logger.timer(ot, 'otpd off-top Laplacian debug', *t0)

    # Unfix dimensionality of rho, ao, and Pi
    if Pi.shape[0] == 1:
        Pi = Pi.reshape(Pi.shape[1])
        rho = rho.reshape(rho.shape[0], rho.shape[2])
        ao = ao.reshape(ao.shape[1], ao.shape[2])

    return Pi
Ejemplo n.º 36
0
def get_jk_favorj(sgx,
                  dm,
                  hermi=1,
                  with_j=True,
                  with_k=True,
                  direct_scf_tol=1e-13):
    t0 = time.clock(), time.time()
    mol = sgx.mol
    grids = sgx.grids
    gthrd = sgx.grids_thrd

    dms = numpy.asarray(dm)
    dm_shape = dms.shape
    nao = dm_shape[-1]
    dms = dms.reshape(-1, nao, nao)
    nset = dms.shape[0]

    if sgx.debug:
        batch_nuc = _gen_batch_nuc(mol)
    else:
        batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol)

    # for basis set to shell
    intor = mol._add_suffix('int3c2e')
    fakemol = gto.fakemol_for_charges(grids.coords)
    atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env,
                                      fakemol._atm, fakemol._bas, fakemol._env)
    ao_loc = moleintor.make_loc(bas, intor)
    rao_loc = numpy.zeros((nao), dtype=int)
    for i in range(mol.nbas):
        for j in range(ao_loc[i], ao_loc[i + 1]):
            rao_loc[j] = i

    sn = numpy.zeros((nao, nao))
    ngrids = grids.coords.shape[0]
    max_memory = sgx.max_memory - lib.current_memory()[0]
    sblk = sgx.blockdim
    blksize = min(ngrids, max(4, int(min(sblk,
                                         max_memory * 1e6 / 8 / nao**2))))
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1, None]
        sn += lib.dot(ao.T, wao)

    ovlp = mol.intor_symmetric('int1e_ovlp')
    proj = scipy.linalg.solve(sn, ovlp)
    proj_dm = lib.einsum('ki,xij->xkj', proj, dms)

    t1 = logger.timer_debug1(mol, "sgX initialziation", *t0)
    vj = numpy.zeros_like(dms)
    vk = numpy.zeros_like(dms)
    tnuc = 0, 0
    for i0, i1 in lib.prange(0, ngrids, blksize):
        coords = grids.coords[i0:i1]
        ao = mol.eval_gto('GTOval', coords)
        wao = ao * grids.weights[i0:i1, None]

        fg = lib.einsum('gi,xij->xgj', wao, proj_dm)
        mask = numpy.zeros(i1 - i0, dtype=bool)
        for i in range(nset):
            gmaxfg = numpy.amax(numpy.absolute(fg[i]), axis=1)
            gmaxwao_v = numpy.amax(numpy.absolute(ao), axis=1)
            gmaxtt = gmaxfg * gmaxwao_v
            mask |= numpy.any(gmaxtt > 1e-7)
            mask |= numpy.any(gmaxtt < -1e-7)
        if not numpy.all(mask):
            ao = ao[mask]
            wao = wao[mask]
            fg = fg[:, mask]
            coords = coords[mask]

        # screening u by value of grids
        umaxg = numpy.amax(numpy.absolute(wao), axis=0)
        usi = numpy.argwhere(umaxg > 1e-7).reshape(-1)
        if len(usi) != 0:
            # screening v by ovlp
            uovl = ovlp[usi, :]
            vmaxu = numpy.amax(numpy.absolute(uovl), axis=0)
            osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1)
            udms = proj_dm[0][usi, :]
            # screening v by dm and ovlp then triangle matrix bn
            dmaxg = numpy.amax(numpy.absolute(udms), axis=0)
            dsi = numpy.argwhere(dmaxg > 1e-4).reshape(-1)
            vsi = numpy.intersect1d(dsi, osi)
            if len(vsi) != 0:
                vsh = numpy.unique(rao_loc[vsi])
                mol._bvv = vsh

        # screening u by value of grids
        umaxg = numpy.amax(numpy.absolute(wao), axis=0)
        usi = numpy.argwhere(umaxg > 1e-7).reshape(-1)
        if len(usi) != 0:
            # screening v by ovlp
            uovl = ovlp[usi, :]
            vmaxu = numpy.amax(numpy.absolute(uovl), axis=0)
            osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1)
            if len(osi) != 0:
                vsh = numpy.unique(rao_loc[osi])
                #print(vsh.shape,'eew',vsh)
                mol._bvv = vsh

        fg = lib.einsum('gi,xij->xgj', wao, proj_dm)
        mask = numpy.zeros(i1 - i0, dtype=bool)
        for i in range(nset):
            mask |= numpy.any(fg[i] > gthrd, axis=1)
            mask |= numpy.any(fg[i] < -gthrd, axis=1)
        if not numpy.all(mask):
            ao = ao[mask]
            fg = fg[:, mask]
            coords = coords[mask]

        if with_j:
            rhog = numpy.einsum('xgu,gu->xg', fg, ao)
        else:
            rhog = None

        if sgx.debug:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            gbn = batch_nuc(mol, coords)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()
            if with_j:
                jpart = numpy.einsum('guv,xg->xuv', gbn, rhog)
            if with_k:
                gv = lib.einsum('gtv,xgt->xgv', gbn, fg)
            gbn = None
        else:
            tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time()
            jpart, gv = batch_jk(mol, coords, rhog, fg)
            tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time()

        if with_j:
            vj += jpart
        if with_k:
            for i in range(nset):
                vk[i] += lib.einsum('gu,gv->uv', ao, gv[i])
        jpart = gv = None

    t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
    tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1]
    logger.debug1(
        sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
        'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0],
        tdot[1])

    for i in range(nset):
        lib.hermi_triu(vj[i], inplace=True)
    if with_k and hermi == 1:
        vk = (vk + vk.transpose(0, 2, 1)) * .5
    logger.timer(mol, "vj and vk", *t0)
    return vj.reshape(dm_shape), vk.reshape(dm_shape)
Ejemplo n.º 37
0
def _make_eris_outcore(mycc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    eris = _ChemistsERIs()
    eris._common_init_(mycc, mo_coeff)

    nocca, noccb = mycc.nocc
    nmoa, nmob = mycc.nmo
    nvira, nvirb = nmoa-nocca, nmob-noccb

    moa = eris.mo_coeff[0]
    mob = eris.mo_coeff[1]
    nmoa = moa.shape[1]
    nmob = mob.shape[1]

    orboa = moa[:,:nocca]
    orbob = mob[:,:noccb]
    orbva = moa[:,nocca:]
    orbvb = mob[:,noccb:]
    eris.feri = lib.H5TmpFile()
    eris.oooo = eris.feri.create_dataset('oooo', (nocca,nocca,nocca,nocca), 'f8')
    eris.ovoo = eris.feri.create_dataset('ovoo', (nocca,nvira,nocca,nocca), 'f8')
    eris.ovov = eris.feri.create_dataset('ovov', (nocca,nvira,nocca,nvira), 'f8')
    eris.oovv = eris.feri.create_dataset('oovv', (nocca,nocca,nvira,nvira), 'f8')
    eris.ovvo = eris.feri.create_dataset('ovvo', (nocca,nvira,nvira,nocca), 'f8')
    eris.ovvv = eris.feri.create_dataset('ovvv', (nocca,nvira,nvira*(nvira+1)//2), 'f8')
    #eris.vvvv = eris.feri.create_dataset('vvvv', (nvira,nvira,nvira,nvira), 'f8')
    eris.OOOO = eris.feri.create_dataset('OOOO', (noccb,noccb,noccb,noccb), 'f8')
    eris.OVOO = eris.feri.create_dataset('OVOO', (noccb,nvirb,noccb,noccb), 'f8')
    eris.OVOV = eris.feri.create_dataset('OVOV', (noccb,nvirb,noccb,nvirb), 'f8')
    eris.OOVV = eris.feri.create_dataset('OOVV', (noccb,noccb,nvirb,nvirb), 'f8')
    eris.OVVO = eris.feri.create_dataset('OVVO', (noccb,nvirb,nvirb,noccb), 'f8')
    eris.OVVV = eris.feri.create_dataset('OVVV', (noccb,nvirb,nvirb*(nvirb+1)//2), 'f8')
    #eris.VVVV = eris.feri.create_dataset('VVVV', (nvirb,nvirb,nvirb,nvirb), 'f8')
    eris.ooOO = eris.feri.create_dataset('ooOO', (nocca,nocca,noccb,noccb), 'f8')
    eris.ovOO = eris.feri.create_dataset('ovOO', (nocca,nvira,noccb,noccb), 'f8')
    eris.ovOV = eris.feri.create_dataset('ovOV', (nocca,nvira,noccb,nvirb), 'f8')
    eris.ooVV = eris.feri.create_dataset('ooVV', (nocca,nocca,nvirb,nvirb), 'f8')
    eris.ovVO = eris.feri.create_dataset('ovVO', (nocca,nvira,nvirb,noccb), 'f8')
    eris.ovVV = eris.feri.create_dataset('ovVV', (nocca,nvira,nvirb*(nvirb+1)//2), 'f8')
    #eris.vvVV = eris.feri.create_dataset('vvVV', (nvira,nvira,nvirb,nvirb), 'f8')
    eris.OVoo = eris.feri.create_dataset('OVoo', (noccb,nvirb,nocca,nocca), 'f8')
    eris.OOvv = eris.feri.create_dataset('OOvv', (noccb,noccb,nvira,nvira), 'f8')
    eris.OVvo = eris.feri.create_dataset('OVvo', (noccb,nvirb,nvira,nocca), 'f8')
    eris.OVvv = eris.feri.create_dataset('OVvv', (noccb,nvirb,nvira*(nvira+1)//2), 'f8')

    cput1 = time.clock(), time.time()
    mol = mycc.mol
    # <ij||pq> = <ij|pq> - <ij|qp> = (ip|jq) - (iq|jp)
    tmpf = lib.H5TmpFile()
    if nocca > 0:
        ao2mo.general(mol, (orboa,moa,moa,moa), tmpf, 'aa')
        buf = np.empty((nmoa,nmoa,nmoa))
        for i in range(nocca):
            lib.unpack_tril(tmpf['aa'][i*nmoa:(i+1)*nmoa], out=buf)
            eris.oooo[i] = buf[:nocca,:nocca,:nocca]
            eris.ovoo[i] = buf[nocca:,:nocca,:nocca]
            eris.ovov[i] = buf[nocca:,:nocca,nocca:]
            eris.oovv[i] = buf[:nocca,nocca:,nocca:]
            eris.ovvo[i] = buf[nocca:,nocca:,:nocca]
            eris.ovvv[i] = lib.pack_tril(buf[nocca:,nocca:,nocca:])
        del(tmpf['aa'])

    if noccb > 0:
        buf = np.empty((nmob,nmob,nmob))
        ao2mo.general(mol, (orbob,mob,mob,mob), tmpf, 'bb')
        for i in range(noccb):
            lib.unpack_tril(tmpf['bb'][i*nmob:(i+1)*nmob], out=buf)
            eris.OOOO[i] = buf[:noccb,:noccb,:noccb]
            eris.OVOO[i] = buf[noccb:,:noccb,:noccb]
            eris.OVOV[i] = buf[noccb:,:noccb,noccb:]
            eris.OOVV[i] = buf[:noccb,noccb:,noccb:]
            eris.OVVO[i] = buf[noccb:,noccb:,:noccb]
            eris.OVVV[i] = lib.pack_tril(buf[noccb:,noccb:,noccb:])
        del(tmpf['bb'])

    if nocca > 0:
        buf = np.empty((nmoa,nmob,nmob))
        ao2mo.general(mol, (orboa,moa,mob,mob), tmpf, 'ab')
        for i in range(nocca):
            lib.unpack_tril(tmpf['ab'][i*nmoa:(i+1)*nmoa], out=buf)
            eris.ooOO[i] = buf[:nocca,:noccb,:noccb]
            eris.ovOO[i] = buf[nocca:,:noccb,:noccb]
            eris.ovOV[i] = buf[nocca:,:noccb,noccb:]
            eris.ooVV[i] = buf[:nocca,noccb:,noccb:]
            eris.ovVO[i] = buf[nocca:,noccb:,:noccb]
            eris.ovVV[i] = lib.pack_tril(buf[nocca:,noccb:,noccb:])
        del(tmpf['ab'])

    if noccb > 0:
        buf = np.empty((nmob,nmoa,nmoa))
        ao2mo.general(mol, (orbob,mob,moa,moa), tmpf, 'ba')
        for i in range(noccb):
            lib.unpack_tril(tmpf['ba'][i*nmob:(i+1)*nmob], out=buf)
            eris.OVoo[i] = buf[noccb:,:nocca,:nocca]
            eris.OOvv[i] = buf[:noccb,nocca:,nocca:]
            eris.OVvo[i] = buf[noccb:,nocca:,:nocca]
            eris.OVvv[i] = lib.pack_tril(buf[noccb:,nocca:,nocca:])
        del(tmpf['ba'])
    buf = None
    cput1 = logger.timer_debug1(mycc, 'transforming oopq, ovpq', *cput1)

    if not mycc.direct:
        ao2mo.full(mol, orbva, eris.feri, dataname='vvvv')
        ao2mo.full(mol, orbvb, eris.feri, dataname='VVVV')
        ao2mo.general(mol, (orbva,orbva,orbvb,orbvb), eris.feri, dataname='vvVV')
        eris.vvvv = eris.feri['vvvv']
        eris.VVVV = eris.feri['VVVV']
        eris.vvVV = eris.feri['vvVV']
        cput1 = logger.timer_debug1(mycc, 'transforming vvvv', *cput1)

    return eris
Ejemplo n.º 38
0
def get_k_e1_kpts(mydf,
                  dm_kpts,
                  kpts=np.zeros((1, 3)),
                  kpts_band=None,
                  exxdiv=None):
    '''Derivatives of exchange (K) AO matrix at sampled k-points.
    '''

    cell = mydf.cell
    mesh = mydf.mesh
    coords = cell.gen_uniform_grids(mesh)
    ngrids = coords.shape[0]

    if getattr(dm_kpts, 'mo_coeff', None) is not None:
        mo_coeff = dm_kpts.mo_coeff
        mo_occ = dm_kpts.mo_occ
    else:
        mo_coeff = None

    kpts = np.asarray(kpts)
    dm_kpts = lib.asarray(dm_kpts, order='C')
    dms = _format_dms(dm_kpts, kpts)
    nset, nkpts, nao = dms.shape[:3]

    weight = 1. / nkpts * (cell.vol / ngrids)

    kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band
    nband = len(kpts_band)

    if gamma_point(kpts_band) and gamma_point(kpts):
        vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=dms.dtype)
    else:
        vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=np.complex128)

    coords = mydf.grids.coords

    if input_band is None:
        ao2_kpts = [
            np.asarray(ao.transpose(0, 2, 1), order='C')
            for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts, deriv=1)
        ]
        ao1_kpts = ao2_kpts
        ao2_kpts = [ao2_kpt[0] for ao2_kpt in ao2_kpts]
    else:
        ao2_kpts = [
            np.asarray(ao.T, order='C')
            for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)
        ]
        ao1_kpts = [
            np.asarray(ao.transpose(0, 2, 1), order='C') for ao in
            mydf._numint.eval_ao(cell, coords, kpts=kpts_band, deriv=1)
        ]

    if mo_coeff is not None and nset == 1:
        mo_coeff = [
            mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0])
            for k, occ in enumerate(mo_occ)
        ]
        ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)]

    mem_now = lib.current_memory()[0]
    max_memory = mydf.max_memory - mem_now
    blksize = int(
        min(nao,
            max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / 3 / ngrids / nao)))
    logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s  blksize %d',
                  max_memory, blksize)

    vR_dm = np.empty((3, nset, nao, ngrids), dtype=vk_kpts.dtype)

    t1 = (logger.process_clock(), logger.perf_counter())
    for k2, ao2T in enumerate(ao2_kpts):
        if ao2T.size == 0:
            continue

        kpt2 = kpts[k2]
        naoj = ao2T.shape[0]
        if mo_coeff is None or nset > 1:
            ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)]
        else:
            ao_dms = [ao2T.conj()]

        for k1, ao1T in enumerate(ao1_kpts):
            kpt1 = kpts_band[k1]

            # If we have an ewald exxdiv, we add the G=0 correction near the
            # end of the function to bypass any discretization errors
            # that arise from the FFT.
            if exxdiv == 'ewald' or exxdiv is None:
                coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh)
            else:
                coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh)
            if is_zero(kpt1 - kpt2):
                expmikr = np.array(1.)
            else:
                expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1))

            for p0, p1 in lib.prange(0, nao, blksize):
                rho1 = np.einsum('aig,jg->aijg',
                                 ao1T[1:, p0:p1].conj() * expmikr, ao2T)
                vG = tools.fft(rho1.reshape(-1, ngrids), mesh)
                rho1 = None
                vG *= coulG
                vR = tools.ifft(vG, mesh).reshape(3, p1 - p0, naoj, ngrids)
                vG = None
                if vR_dm.dtype == np.double:
                    vR = vR.real
                for i in range(nset):
                    np.einsum('aijg,jg->aig',
                              vR,
                              ao_dms[i],
                              out=vR_dm[:, i, p0:p1])
                vR = None
            vR_dm *= expmikr.conj()

            for i in range(nset):
                vk_kpts[:, i, k1] -= weight * np.einsum(
                    'aig,jg->aij', vR_dm[:, i], ao1T[0])
        t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1)

    # Ewald correction has no contribution to nuclear gradient unless range separted Coulomb is used
    # The gradient correction part is not added in the vk matrix
    if exxdiv == 'ewald' and cell.omega != 0:
        raise NotImplementedError("Range Separated Coulomb")
        # when cell.omega !=0: madelung constant will have a non-zero derivative
    vk_kpts = np.asarray(
        [_format_jks(vk, dm_kpts, input_band, kpts) for vk in vk_kpts])
    return vk_kpts
Ejemplo n.º 39
0
def transform_integrals_outcore(myadc):

    cput0 = (logger.process_clock(), logger.perf_counter())
    log = logger.Logger(myadc.stdout, myadc.verbose)

    mo_a = myadc.mo_coeff[0]
    mo_b = myadc.mo_coeff[1]
    nmo_a = mo_a.shape[1]
    nmo_b = mo_b.shape[1]

    occ_a = myadc.mo_coeff[0][:, :myadc._nocc[0]]
    occ_b = myadc.mo_coeff[1][:, :myadc._nocc[1]]
    vir_a = myadc.mo_coeff[0][:, myadc._nocc[0]:]
    vir_b = myadc.mo_coeff[1][:, myadc._nocc[1]:]

    nocc_a = occ_a.shape[1]
    nocc_b = occ_b.shape[1]
    nvir_a = vir_a.shape[1]
    nvir_b = vir_b.shape[1]

    nvpair_a = nvir_a * (nvir_a + 1) // 2
    nvpair_b = nvir_b * (nvir_b + 1) // 2

    eris = lambda: None

    eris.feri1 = lib.H5TmpFile()
    eris.oooo = eris.feri1.create_dataset('oooo',
                                          (nocc_a, nocc_a, nocc_a, nocc_a),
                                          'f8')
    eris.oovv = eris.feri1.create_dataset('oovv',
                                          (nocc_a, nocc_a, nvir_a, nvir_a),
                                          'f8',
                                          chunks=(nocc_a, nocc_a, 1, nvir_a))
    eris.ovoo = eris.feri1.create_dataset('ovoo',
                                          (nocc_a, nvir_a, nocc_a, nocc_a),
                                          'f8',
                                          chunks=(nocc_a, 1, nocc_a, nocc_a))
    eris.ovvo = eris.feri1.create_dataset('ovvo',
                                          (nocc_a, nvir_a, nvir_a, nocc_a),
                                          'f8',
                                          chunks=(nocc_a, 1, nvir_a, nocc_a))
    eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc_a, nvir_a, nvpair_a),
                                          'f8')

    eris.OOOO = eris.feri1.create_dataset('OOOO',
                                          (nocc_b, nocc_b, nocc_b, nocc_b),
                                          'f8')
    eris.OOVV = eris.feri1.create_dataset('OOVV',
                                          (nocc_b, nocc_b, nvir_b, nvir_b),
                                          'f8',
                                          chunks=(nocc_b, nocc_b, 1, nvir_b))
    eris.OVOO = eris.feri1.create_dataset('OVOO',
                                          (nocc_b, nvir_b, nocc_b, nocc_b),
                                          'f8',
                                          chunks=(nocc_b, 1, nocc_b, nocc_b))
    eris.OVVO = eris.feri1.create_dataset('OVVO',
                                          (nocc_b, nvir_b, nvir_b, nocc_b),
                                          'f8',
                                          chunks=(nocc_b, 1, nvir_b, nocc_b))
    eris.OVVV = eris.feri1.create_dataset('OVVV', (nocc_b, nvir_b, nvpair_b),
                                          'f8')

    eris.ooOO = eris.feri1.create_dataset('ooOO',
                                          (nocc_a, nocc_a, nocc_b, nocc_b),
                                          'f8')
    eris.ooVV = eris.feri1.create_dataset('ooVV',
                                          (nocc_a, nocc_a, nvir_b, nvir_b),
                                          'f8',
                                          chunks=(nocc_a, nocc_a, 1, nvir_b))
    eris.ovOO = eris.feri1.create_dataset('ovOO',
                                          (nocc_a, nvir_a, nocc_b, nocc_b),
                                          'f8',
                                          chunks=(nocc_a, 1, nocc_b, nocc_b))
    eris.ovVO = eris.feri1.create_dataset('ovVO',
                                          (nocc_a, nvir_a, nvir_b, nocc_b),
                                          'f8',
                                          chunks=(nocc_a, 1, nvir_b, nocc_b))
    eris.ovVV = eris.feri1.create_dataset('ovVV', (nocc_a, nvir_a, nvpair_b),
                                          'f8')

    eris.OOvv = eris.feri1.create_dataset('OOvv',
                                          (nocc_b, nocc_b, nvir_a, nvir_a),
                                          'f8',
                                          chunks=(nocc_b, nocc_b, 1, nvir_a))
    eris.OVoo = eris.feri1.create_dataset('OVoo',
                                          (nocc_b, nvir_b, nocc_a, nocc_a),
                                          'f8',
                                          chunks=(nocc_b, 1, nocc_a, nocc_a))
    eris.OVvo = eris.feri1.create_dataset('OVvo',
                                          (nocc_b, nvir_b, nvir_a, nocc_a),
                                          'f8',
                                          chunks=(nocc_b, 1, nvir_a, nocc_a))
    eris.OVvv = eris.feri1.create_dataset('OVvv', (nocc_b, nvir_b, nvpair_a),
                                          'f8')

    cput1 = logger.process_clock(), logger.perf_counter()
    mol = myadc.mol
    tmpf = lib.H5TmpFile()
    if nocc_a > 0:
        ao2mo.general(mol, (occ_a, mo_a, mo_a, mo_a), tmpf, 'aa')
        buf = np.empty((nmo_a, nmo_a, nmo_a))
        for i in range(nocc_a):
            lib.unpack_tril(tmpf['aa'][i * nmo_a:(i + 1) * nmo_a], out=buf)
            eris.oooo[i] = buf[:nocc_a, :nocc_a, :nocc_a]
            eris.ovoo[i] = buf[nocc_a:, :nocc_a, :nocc_a]
            eris.oovv[i] = buf[:nocc_a, nocc_a:, nocc_a:]
            eris.ovvo[i] = buf[nocc_a:, nocc_a:, :nocc_a]
            eris.ovvv[i] = lib.pack_tril(buf[nocc_a:, nocc_a:, nocc_a:])
        del (tmpf['aa'])

    if nocc_b > 0:
        buf = np.empty((nmo_b, nmo_b, nmo_b))
        ao2mo.general(mol, (occ_b, mo_b, mo_b, mo_b), tmpf, 'bb')
        for i in range(nocc_b):
            lib.unpack_tril(tmpf['bb'][i * nmo_b:(i + 1) * nmo_b], out=buf)
            eris.OOOO[i] = buf[:nocc_b, :nocc_b, :nocc_b]
            eris.OVOO[i] = buf[nocc_b:, :nocc_b, :nocc_b]
            eris.OOVV[i] = buf[:nocc_b, nocc_b:, nocc_b:]
            eris.OVVO[i] = buf[nocc_b:, nocc_b:, :nocc_b]
            eris.OVVV[i] = lib.pack_tril(buf[nocc_b:, nocc_b:, nocc_b:])
        del (tmpf['bb'])

    if nocc_a > 0:
        buf = np.empty((nmo_a, nmo_b, nmo_b))
        ao2mo.general(mol, (occ_a, mo_a, mo_b, mo_b), tmpf, 'ab')
        for i in range(nocc_a):
            lib.unpack_tril(tmpf['ab'][i * nmo_a:(i + 1) * nmo_a], out=buf)
            eris.ooOO[i] = buf[:nocc_a, :nocc_b, :nocc_b]
            eris.ovOO[i] = buf[nocc_a:, :nocc_b, :nocc_b]
            eris.ooVV[i] = buf[:nocc_a, nocc_b:, nocc_b:]
            eris.ovVO[i] = buf[nocc_a:, nocc_b:, :nocc_b]
            eris.ovVV[i] = lib.pack_tril(buf[nocc_a:, nocc_b:, nocc_b:])
        del (tmpf['ab'])

    if nocc_b > 0:
        buf = np.empty((nmo_b, nmo_a, nmo_a))
        ao2mo.general(mol, (occ_b, mo_b, mo_a, mo_a), tmpf, 'ba')
        for i in range(nocc_b):
            lib.unpack_tril(tmpf['ba'][i * nmo_b:(i + 1) * nmo_b], out=buf)
            eris.OVoo[i] = buf[nocc_b:, :nocc_a, :nocc_a]
            eris.OOvv[i] = buf[:nocc_b, nocc_a:, nocc_a:]
            eris.OVvo[i] = buf[nocc_b:, nocc_a:, :nocc_a]
            eris.OVvv[i] = lib.pack_tril(buf[nocc_b:, nocc_a:, nocc_a:])
        del (tmpf['ba'])

    buf = None
    cput1 = logger.timer_debug1(myadc, 'transforming oopq, ovpq', *cput1)

    ############### forming eris_vvvv ########################################

    if (myadc.method == "adc(2)-x" or myadc.method == "adc(3)"):

        cput2 = logger.process_clock(), logger.perf_counter()

        ind_vv_g = np.tril_indices(nvir_a, k=-1)
        ind_VV_g = np.tril_indices(nvir_b, k=-1)

        eris.vvvv_p = []
        eris.VVVV_p = []
        eris.vVvV_p = []
        eris.VvVv_p = []

        avail_mem = (myadc.max_memory - lib.current_memory()[0]) * 0.25
        vvv_mem = (nvir_a**3) * 8 / 1e6

        chnk_size = int(avail_mem / vvv_mem)

        if chnk_size <= 0:
            chnk_size = 1

        for p in range(0, vir_a.shape[1], chnk_size):

            if chnk_size < vir_a.shape[1]:
                orb_slice = vir_a[:, p:p + chnk_size]
            else:
                orb_slice = vir_a[:, p:]

            _, tmp = tempfile.mkstemp()
            ao2mo.outcore.general(mol, (orb_slice, vir_a, vir_a, vir_a),
                                  tmp,
                                  max_memory=avail_mem,
                                  ioblk_size=100,
                                  compact=False)
            vvvv = radc_ao2mo.read_dataset(tmp, 'eri_mo')
            del (tmp)
            vvvv = vvvv.reshape(orb_slice.shape[1], vir_a.shape[1],
                                vir_a.shape[1], vir_a.shape[1])
            vvvv = np.ascontiguousarray(vvvv.transpose(0, 2, 1, 3))
            vvvv -= np.ascontiguousarray(vvvv.transpose(0, 1, 3, 2))
            vvvv = vvvv[:, :, ind_vv_g[0], ind_vv_g[1]]

            vvvv_p = radc_ao2mo.write_dataset(vvvv)
            del vvvv
            eris.vvvv_p.append(vvvv_p)

        for p in range(0, vir_b.shape[1], chnk_size):

            if chnk_size < vir_b.shape[1]:
                orb_slice = vir_b[:, p:p + chnk_size]
            else:
                orb_slice = vir_b[:, p:]

            _, tmp = tempfile.mkstemp()
            ao2mo.outcore.general(mol, (orb_slice, vir_b, vir_b, vir_b),
                                  tmp,
                                  max_memory=avail_mem,
                                  ioblk_size=100,
                                  compact=False)
            VVVV = radc_ao2mo.read_dataset(tmp, 'eri_mo')
            del (tmp)
            VVVV = VVVV.reshape(orb_slice.shape[1], vir_b.shape[1],
                                vir_b.shape[1], vir_b.shape[1])
            VVVV = np.ascontiguousarray(VVVV.transpose(0, 2, 1, 3))
            VVVV -= np.ascontiguousarray(VVVV.transpose(0, 1, 3, 2))
            VVVV = VVVV[:, :, ind_VV_g[0], ind_VV_g[1]]

            VVVV_p = radc_ao2mo.write_dataset(VVVV)
            del VVVV
            eris.VVVV_p.append(VVVV_p)

        for p in range(0, vir_a.shape[1], chnk_size):

            if chnk_size < vir_a.shape[1]:
                orb_slice = vir_a[:, p:p + chnk_size]
            else:
                orb_slice = vir_a[:, p:]

            _, tmp = tempfile.mkstemp()
            ao2mo.outcore.general(mol, (orb_slice, vir_a, vir_b, vir_b),
                                  tmp,
                                  max_memory=avail_mem,
                                  ioblk_size=100,
                                  compact=False)
            vVvV = radc_ao2mo.read_dataset(tmp, 'eri_mo')
            del (tmp)
            vVvV = vVvV.reshape(orb_slice.shape[1], vir_a.shape[1],
                                vir_b.shape[1], vir_b.shape[1])
            vVvV = np.ascontiguousarray(vVvV.transpose(0, 2, 1, 3))
            vVvV = vVvV.reshape(-1, vir_b.shape[1],
                                vir_a.shape[1] * vir_b.shape[1])

            vVvV_p = radc_ao2mo.write_dataset(vVvV)
            del vVvV
            eris.vVvV_p.append(vVvV_p)

        for p in range(0, vir_b.shape[1], chnk_size):

            if chnk_size < vir_b.shape[1]:
                orb_slice = vir_b[:, p:p + chnk_size]
            else:
                orb_slice = vir_b[:, p:]

            _, tmp = tempfile.mkstemp()
            ao2mo.outcore.general(mol, (orb_slice, vir_b, vir_a, vir_a),
                                  tmp,
                                  max_memory=avail_mem,
                                  ioblk_size=100,
                                  compact=False)
            VvVv = radc_ao2mo.read_dataset(tmp, 'eri_mo')
            del tmp
            VvVv = VvVv.reshape(orb_slice.shape[1], vir_b.shape[1],
                                vir_a.shape[1], vir_a.shape[1])
            VvVv = np.ascontiguousarray(VvVv.transpose(0, 2, 1, 3))
            VvVv = VvVv.reshape(-1, vir_a.shape[1],
                                vir_b.shape[1] * vir_a.shape[1])

            VvVv_p = radc_ao2mo.write_dataset(VvVv)
            del VvVv
            eris.VvVv_p.append(VvVv_p)

        cput2 = logger.timer_debug1(myadc, 'transforming vvvv', *cput2)

    log.timer('ADC outcore integral transformation', *cput0)
    return eris