Ejemplo n.º 1
0
    def make_kpt(uniq_kptji_id, cholesky_j2c):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp',
                                      hermi=1,
                                      kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1),
                     nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16,
                           int(max_memory * .1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(16,
                           int(max_memory * .2e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngrids, 16384)

        def load(aux_slice):
            col0, col1 = aux_slice
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.vstack([
                    fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T
                    for i in range(nsegs)
                ])
                # vbar is the interaction between the background charge
                # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
                if is_zero(kpt) and cell.dimension == 3:
                    for i in numpy.where(vbar != 0)[0]:
                        v[i] -= vbar[i] * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
            return j3cR, j3cI

        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128)
        cols = [sh_range[2] for sh_range in shranges]
        locs = numpy.append(0, numpy.cumsum(cols))
        tasks = zip(locs[:-1], locs[1:])
        for istep, (j3cR,
                    j3cI) in enumerate(lib.map_with_prefetch(load, tasks)):
            bstart, bend, ncol = shranges[istep]
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1,
                       len(shranges), bstart, bend, ncol)
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell,
                                            Gv[p0:p1],
                                            shls_slice,
                                            aosym,
                                            b,
                                            gxyz[p0:p1],
                                            Gvbase,
                                            kpt,
                                            adapted_kptjs,
                                            out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG, ncol)
                    pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c,
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                    feri['j3c/%d/%d' % (ji, istep)] = v
                else:
                    feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v)
            j3cR = j3cI = None

        for ji in adapted_ji_idx:
            del (fswap['j3c-junk/%d' % ji])
Ejemplo n.º 2
0
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        Gaux = fuse(Gaux)
        Gaux *= mydf.weighted_coulG(kpt, False, gs)
        kLR = Gaux.T.real.copy('C')
        kLI = Gaux.T.imag.copy('C')
        j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id])
        # Note large difference may be found in results between the CD/eig treatments.
        # In some systems, small integral errors can lead to different treatments of
        # linear dependency which can be observed in the total energy/orbital energy
        # around 4th decimal place.
        #        try:
        #            j2c = scipy.linalg.cholesky(j2c, lower=True)
        #            j2ctag = 'CD'
        #        except scipy.linalg.LinAlgError as e:
        #
        # Abandon CD treatment for better numerical stablity
        w, v = scipy.linalg.eigh(j2c)
        log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns',
                  uniq_kptji_id, w[-1] / w[0],
                  numpy.count_nonzero(w < mydf.linear_dep_threshold))
        v = v[:, w > mydf.linear_dep_threshold].T.conj()
        v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1)
        j2c = v
        j2ctag = 'eig'
        naux0 = j2c.shape[0]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]))
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
            for p0, p1 in lib.prange(0, ngs, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell,
                                            Gv[p0:p1],
                                            shls_slice,
                                            aosym,
                                            b,
                                            gxyz[p0:p1],
                                            Gvbase,
                                            kpt,
                                            adapted_kptjs,
                                            out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG, ncol)
                    pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = j3cR[k]
                else:
                    v = j3cR[k] + j3cI[k] * 1j
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c,
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c, v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        del (feri['j2c/%d' % uniq_kptji_id])
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v
Ejemplo n.º 3
0
def cholesky_eri(mol,
                 auxbasis='weigend+etb',
                 auxmol=None,
                 int3c='int3c2e',
                 aosym='s2ij',
                 int2c='int2c2e',
                 comp=1,
                 max_memory=MAX_MEMORY,
                 verbose=0,
                 fauxe2=aux_e2):
    '''
    Returns:
        2D array of (naux,nao*(nao+1)/2) in C-contiguous
    '''
    from pyscf.df.outcore import _guess_shell_ranges
    assert (comp == 1)
    t0 = (time.clock(), time.time())
    log = logger.new_logger(mol, verbose)
    if auxmol is None:
        auxmol = addons.make_auxmol(mol, auxbasis)

    j2c = auxmol.intor(int2c, hermi=1)
    try:
        low = scipy.linalg.cholesky(j2c, lower=True)
        tag = 'cd'
    except scipy.linalg.LinAlgError:
        w, v = scipy.linalg.eigh(j2c)
        idx = w > LINEAR_DEP_THR
        low = (v[:, idx] / numpy.sqrt(w[idx]))
        v = None
        tag = 'eig'
    j2c = None
    naoaux, naux = low.shape
    log.debug('size of aux basis %d', naux)
    t1 = log.timer_debug1('2c2e', *t0)

    int3c = gto.moleintor.ascint3(mol._add_suffix(int3c))
    atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env,
                                      auxmol._atm, auxmol._bas, auxmol._env)
    ao_loc = gto.moleintor.make_loc(bas, int3c)
    nao = ao_loc[mol.nbas]

    if aosym == 's1':
        nao_pair = nao * nao
    else:
        nao_pair = nao * (nao + 1) // 2

    cderi = numpy.empty((naux, nao_pair))

    max_words = max_memory * .98e6 / 8 - low.size - cderi.size
    # Divide by 3 because scipy.linalg.solve may create a temporary copy for
    # ints and return another copy for results
    buflen = min(max(int(max_words / naoaux / comp / 3), 8), nao_pair)
    shranges = _guess_shell_ranges(mol, buflen, aosym)
    log.debug1('shranges = %s', shranges)

    cintopt = gto.moleintor.make_cintopt(atm, bas, env, int3c)
    bufs1 = numpy.empty((comp * max([x[2] for x in shranges]), naoaux))
    bufs2 = numpy.empty_like(bufs1)

    p1 = 0
    for istep, sh_range in enumerate(shranges):
        log.debug('int3c2e [%d/%d], AO [%d:%d], nrow = %d', istep + 1,
                  len(shranges), *sh_range)
        bstart, bend, nrow = sh_range
        shls_slice = (bstart, bend, 0, mol.nbas, mol.nbas,
                      mol.nbas + auxmol.nbas)
        ints = gto.moleintor.getints3c(int3c,
                                       atm,
                                       bas,
                                       env,
                                       shls_slice,
                                       comp,
                                       aosym,
                                       ao_loc,
                                       cintopt,
                                       out=bufs1)

        if ints.ndim == 3 and ints.flags.f_contiguous:
            ints = lib.transpose(ints.T, axes=(0, 2, 1),
                                 out=bufs2).reshape(naoaux, -1)
            bufs1, bufs2 = bufs2, bufs1
        else:
            ints = ints.reshape((-1, naoaux)).T

        p0, p1 = p1, p1 + nrow
        if tag == 'cd':
            if ints.flags.c_contiguous:
                ints = lib.transpose(ints, out=bufs2).T
                bufs1, bufs2 = bufs2, bufs1
            dat = scipy.linalg.solve_triangular(low,
                                                ints,
                                                lower=True,
                                                overwrite_b=True,
                                                check_finite=False)
            if dat.flags.f_contiguous:
                dat = lib.transpose(dat.T, out=bufs2)
            cderi[:, p0:p1] = dat
        else:
            dat = numpy.ndarray((naux, ints.shape[1]), buffer=bufs2)
            cderi[:, p0:p1] = lib.dot(low.T, ints, c=dat)
        dat = ints = None

    log.timer('cholesky_eri', *t0)
    return cderi
Ejemplo n.º 4
0
    def make_kpt(uniq_kptji_id, cholesky_j2c):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        Gaux = fuse(Gaux)
        Gaux *= mydf.weighted_coulG(kpt, False, mesh)
        kLR = Gaux.T.real.copy('C')
        kLI = Gaux.T.imag.copy('C')

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.empty((nkptj,buflen*Gblksize), dtype=numpy.complex128)
        def pw_contract(istep, sh_range, j3cR, j3cI):
            bstart, bend, ncol = sh_range
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                            b, gxyz[p0:p1], Gvbase, kpt,
                                            adapted_kptjs, out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG,ncol)
                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = j3cR[k]
                else:
                    v = j3cR[k] + j3cI[k] * 1j
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                    feri['j3c/%d/%d'%(ji,istep)] = v
                else:
                    feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)

        with lib.call_in_background(pw_contract) as compute:
            col1 = 0
            for istep, sh_range in enumerate(shranges):
                log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                           istep+1, len(shranges), *sh_range)
                bstart, bend, ncol = sh_range
                col0, col1 = col1, col1+ncol
                j3cR = []
                j3cI = []
                for k, idx in enumerate(adapted_ji_idx):
                    v = [fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]
                    v = fuse(numpy.vstack(v))
                    if is_zero(kpt) and cell.dimension == 3:
                        for i in numpy.where(vbar != 0)[0]:
                            v[i] -= vbar[i] * ovlp[k][col0:col1]
                    j3cR.append(numpy.asarray(v.real, order='C'))
                    if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                        j3cI.append(None)
                    else:
                        j3cI.append(numpy.asarray(v.imag, order='C'))
                    v = None
                compute(istep, sh_range, j3cR, j3cI)
        for ji in adapted_ji_idx:
            del(fswap['j3c-junk/%d'%ji])
Ejemplo n.º 5
0
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ncol),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt)
                                and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:],
                                    1)
                            lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ni, nao),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1, 2, 0)
                        pqkI[:] = aoao.imag.transpose(1, 2, 0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1, nG)
                        pqkI = pqkI.reshape(-1, nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1,
                               j3cR[k][naux:], j3cI[k][naux:], 1)

            naux0 = nauxs[uniq_kptji_id]
            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2c[uniq_kptji_id][0] == 'CD':
                    v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1],
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c[uniq_kptji_id][1], v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        naux0 = nauxs[uniq_kptji_id]
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v
Ejemplo n.º 6
0
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id])
        try:
            j2c = scipy.linalg.cholesky(j2c, lower=True)
            j2ctag = 'CD'
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that gs is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0],
                      numpy.count_nonzero(w < mydf.linear_dep_threshold))
            v = v[:, w > mydf.linear_dep_threshold].T.conj()
            v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1)
            j2c = v
            j2ctag = 'eig'
        naux0 = j2c.shape[0]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
            for p0, p1 in lib.prange(0, ngs, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell,
                                            Gv[p0:p1],
                                            shls_slice,
                                            aosym,
                                            b,
                                            gxyz[p0:p1],
                                            Gvbase,
                                            kpt,
                                            adapted_kptjs,
                                            out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG, ncol)
                    pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c,
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c, v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        del (feri['j2c/%d' % uniq_kptji_id])
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v
Ejemplo n.º 7
0
Archivo: df.py Proyecto: zwang123/pyscf
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt):
            G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv)
            s = plain_ints[-Gaux.shape[1]:]  # Only compensated Gaussians
            Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1,1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None
        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        try:
            j2c = scipy.linalg.cholesky(j2c, lower=True)
            j2ctag = 'CD'
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug('cond = %.4g, drop %d bfns',
                      w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold))
            v = v[:,w>mydf.linear_dep_threshold].T.conj()
            v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
            j2c = v
            j2ctag = 'eig'
        naux0 = j2c.shape[0]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = mydf.auxbar(fused_cell)
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128)
        def pw_contract(istep, sh_range, j3cR, j3cI):
            bstart, bend, ncol = sh_range
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                            b, gxyz[p0:p1], Gvbase, kpt,
                                            adapted_kptjs, out=buf)

                if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt):
                    G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1])
                    if SI_on_z.size > 0:
                        for k, aoao in enumerate(dat):
                            aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k])
                            aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T)
                            vG_mod = numpy.einsum('ig,g,g->i', aux.conj(),
                                                  wcoulG[p0:p1][G0idx], SI_on_z)
                            if gamma_point(adapted_kptjs[k]):
                                j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k]
                            else:
                                tmp = vG_mod[:,None] * ovlp[k]
                                j3cR[k][:naux] -= tmp.real
                                j3cI[k][:naux] -= tmp.imag
                            tmp = aux = vG_mod

                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG,ncol)
                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                else:
                    v = lib.dot(j2c, v)
                feri['j3c/%d/%d'%(ji,istep)] = v

        with lib.call_in_background(pw_contract) as compute:
            col1 = 0
            for istep, sh_range in enumerate(shranges):
                log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                           istep+1, len(shranges), *sh_range)
                bstart, bend, ncol = sh_range
                col0, col1 = col1, col1+ncol
                j3cR = []
                j3cI = []
                for k, idx in enumerate(adapted_ji_idx):
                    v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T
                                      for i in range(nsegs)])
                    if is_zero(kpt) and cell.dimension == 3:
                        for i in numpy.where(vbar != 0)[0]:
                            v[i] -= vbar[i] * ovlp[k][col0:col1]
                    j3cR.append(numpy.asarray(v.real, order='C'))
                    if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                        j3cI.append(None)
                    else:
                        j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
                compute(istep, sh_range, j3cR, j3cI)
        for ji in adapted_ji_idx:
            del(fswap['j3c-junk/%d'%ji])
Ejemplo n.º 8
0
Archivo: df.py Proyecto: chrinide/pyscf
    def make_kpt(uniq_kptji_id, cholesky_j2c):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1,1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128)
        def pw_contract(istep, sh_range, j3cR, j3cI):
            bstart, bend, ncol = sh_range
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                            b, gxyz[p0:p1], Gvbase, kpt,
                                            adapted_kptjs, out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG,ncol)
                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                    feri['j3c/%d/%d'%(ji,istep)] = v
                else:
                    feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)

        with lib.call_in_background(pw_contract) as compute:
            col1 = 0
            for istep, sh_range in enumerate(shranges):
                log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                           istep+1, len(shranges), *sh_range)
                bstart, bend, ncol = sh_range
                col0, col1 = col1, col1+ncol
                j3cR = []
                j3cI = []
                for k, idx in enumerate(adapted_ji_idx):
                    v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T
                                      for i in range(nsegs)])
                    # vbar is the interaction between the background charge
                    # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
                    if is_zero(kpt) and cell.dimension == 3:
                        for i in numpy.where(vbar != 0)[0]:
                            v[i] -= vbar[i] * ovlp[k][col0:col1]
                    j3cR.append(numpy.asarray(v.real, order='C'))
                    if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                        j3cI.append(None)
                    else:
                        j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
                compute(istep, sh_range, j3cR, j3cI)
        for ji in adapted_ji_idx:
            del(fswap['j3c-junk/%d'%ji])
Ejemplo n.º 9
0
def _make_j3c(mydf, mol, auxmol):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    atm, bas, env, ao_loc = incore._env_and_aoloc('cint3c2e_sph', mol, auxmol)
    nao = ao_loc[mol.nbas]
    naux = ao_loc[-1] - nao
    nao_pair = nao * (nao+1) // 2
    cintopt = gto.moleintor.make_cintopt(atm, bas, env, 'cint3c2e_sph')
    if mydf.approx_sr_level == 0:
        get_Lpq = _make_Lpq(mydf, mol, auxmol)
    else:
        get_Lpq = _make_Lpq_atomic_approx(mydf, mol, auxmol)

    feri = h5py.File(mydf._cderi)
    chunks = (min(256,naux), min(256,nao_pair)) # 512K
    feri.create_dataset('j3c', (naux,nao_pair), 'f8', chunks=chunks)
    feri.create_dataset('Lpq', (naux,nao_pair), 'f8', chunks=chunks)

    def save(label, dat, col0, col1):
        feri[label][:,col0:col1] = dat

    Gv, Gvbase, kws = non_uniform_kgrids(mydf.gs)
    nxyz = [i*2 for i in mydf.gs]
    gxyz = lib.cartesian_prod([range(i) for i in nxyz])
    kk = numpy.einsum('ki,ki->k', Gv, Gv)
    idx = numpy.argsort(kk)[::-1]
    kk = kk[idx]
    Gv = Gv[idx]
    kws = kws[idx]
    gxyz = gxyz[idx]
    coulG = .5/numpy.pi**2 * kws / kk

    aoaux = ft_ao.ft_ao(auxmol, Gv, None, Gvbase, gxyz, nxyz)
    kLR = numpy.asarray(aoaux.real, order='C')
    kLI = numpy.asarray(aoaux.imag, order='C')
    j2c = auxmol.intor('cint2c2e_sph', hermi=1).T  # .T to C-ordr
    lib.dot(kLR.T*coulG, kLR, -1, j2c, 1)
    lib.dot(kLI.T*coulG, kLI, -1, j2c, 1)

    kLR *= coulG.reshape(-1,1)
    kLI *= coulG.reshape(-1,1)
    aoaux = coulG = kk = kws = idx = None

    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    buflen = min(max(int(max_memory*.3*1e6/8/naux), 1), nao_pair)
    shranges = outcore._guess_shell_ranges(mol, buflen, 's2ij')
    buflen = max([x[2] for x in shranges])
    blksize = max(16, int(max_memory*.15*1e6/16/buflen))
    pqkbuf = numpy.empty(buflen*blksize)
    bufs1 = numpy.empty((buflen*naux))
    # bufs2 holds either Lpq and ft_aopair
    bufs2 = numpy.empty(max(buflen*(naux+1),buflen*blksize*2)) # *2 for cmplx

    col1 = 0
    for istep, sh_range in enumerate(shranges):
        log.debug('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                  istep+1, len(shranges), *sh_range)
        bstart, bend, ncol = sh_range
        col0, col1 = col1, col1+ncol
        shls_slice = (bstart, bend, 0, bend, mol.nbas, mol.nbas+auxmol.nbas)

        Lpq = get_Lpq(shls_slice, col0, col1, bufs2)
        save('Lpq', Lpq, col0, col1)

        j3c = _ri.nr_auxe2('cint3c2e_sph', atm, bas, env, shls_slice, ao_loc,
                           's2ij', 1, cintopt, bufs1)
        j3c = j3c.T  # -> (L|pq) in C-order
        lib.dot(j2c, Lpq, -.5, j3c, 1)
        Lpq = None

        for p0, p1 in lib.prange(0, Gv.shape[0], blksize):
            aoao = ft_ao.ft_aopair(mol, Gv[p0:p1], shls_slice[:4], 's2',
                                   Gvbase, gxyz[p0:p1], nxyz, buf=bufs2)
            nG = p1 - p0
            pqkR = numpy.ndarray((ncol,nG), buffer=pqkbuf)
            pqkR[:] = aoao.real.T
            lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3c, 1)
            pqkI = numpy.ndarray((ncol,nG), buffer=pqkbuf)
            pqkI[:] = aoao.imag.T
            lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3c, 1)
            aoao = aoaux = None
        save('j3c', j3c, col0, col1)

    feri.close()
Ejemplo n.º 10
0
Archivo: df.py Proyecto: eronca/pyscf
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k][naux:], j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)

                v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v,
                                                  lower=True, overwrite_b=True)
                feri['j3c/%d'%ji][:naux,col0:col1] = v