Beispiel #1
0
def modified_cholesky_direct(M, tol=1e-5, verbose=False, cmax=20):
    """Modified cholesky decomposition of matrix.

    See, e.g. [Motta17]_

    Parameters
    ----------
    M : :class:`numpy.ndarray`
        Positive semi-definite, symmetric matrix.
    tol : float
        Accuracy desired. Optional. Default : False.
    verbose : bool
        If true print out convergence progress. Optional. Default : False.
    cmax : int
        Number of cholesky vectors to store N_gamma = cmax M.

    Returns
    -------
    chol_vecs : :class:`numpy.ndarray`
        Matrix of cholesky vectors.
    """
    # matrix of residuals.
    delta = numpy.copy(M.diagonal())
    nchol_max = int(cmax * M.shape[0]**0.5)
    # index of largest diagonal element of residual matrix.
    nu = numpy.argmax(numpy.abs(delta))
    delta_max = delta[nu]
    if verbose:
        print("# max number of cholesky vectors = %d" % nchol_max)
        header = ['iteration', 'max_residual', 'time']
        print(format_fixed_width_strings(header))
        init = [delta_max]
        print('{:17d} '.format(0) + format_fixed_width_floats(init))
        # print ("# iteration %d: delta_max = %f"%(0, delta_max.real))
    # Store for current approximation to input matrix.
    Mapprox = numpy.zeros(M.shape[0], dtype=M.dtype)
    chol_vecs = numpy.zeros((nchol_max, M.shape[0]), dtype=M.dtype)
    nchol = 0
    chol_vecs[0] = numpy.copy(M[:, nu]) / delta_max**0.5
    while abs(delta_max) > tol:
        # Update cholesky vector
        start = time.time()
        Mapprox += chol_vecs[nchol] * chol_vecs[nchol].conj()
        delta = M.diagonal() - Mapprox
        nu = numpy.argmax(numpy.abs(delta))
        delta_max = numpy.abs(delta[nu])
        nchol += 1
        Munu0 = numpy.dot(chol_vecs[:nchol, nu].conj(), chol_vecs[:nchol, :])
        chol_vecs[nchol] = (M[:, nu] - Munu0) / (delta_max)**0.5
        if verbose:
            step_time = time.time() - start
            out = [delta_max, step_time]
            print('{:17d} '.format(nchol) + format_fixed_width_floats(out))

    return numpy.array(chol_vecs[:nchol])
Beispiel #2
0
def chunked_cholesky(mol, max_error=1e-6, verbose=False, cmax=10):
    """Modified cholesky decomposition from pyscf eris.

    See, e.g. [Motta17]_

    Only works for molecular systems.

    Parameters
    ----------
    mol : :class:`pyscf.mol`
        pyscf mol object.
    orthoAO: :class:`numpy.ndarray`
        Orthogonalising matrix for AOs. (e.g., mo_coeff).
    delta : float
        Accuracy desired.
    verbose : bool
        If true print out convergence progress.
    cmax : int
        nchol = cmax * M, where M is the number of basis functions.
        Controls buffer size for cholesky vectors.

    Returns
    -------
    chol_vecs : :class:`numpy.ndarray`
        Matrix of cholesky vectors in AO basis.
    """
    nao = mol.nao_nr()
    diag = numpy.zeros(nao*nao)
    nchol_max = cmax * nao
    # This shape is more convenient for pauxy.
    chol_vecs = numpy.zeros((nchol_max, nao*nao))
    # eri = numpy.zeros((nao,nao,nao,nao))
    ndiag = 0
    dims = [0]
    nao_per_i = 0
    for i in range(0,mol.nbas):
        l = mol.bas_angular(i)
        nc = mol.bas_nctr(i)
        nao_per_i += (2*l+1)*nc
        dims.append(nao_per_i)
    start = time.time()
    for i in range(0,mol.nbas):
        shls = (i,i+1,0,mol.nbas,i,i+1,0,mol.nbas)
        buf = mol.intor('int2e_sph', shls_slice=shls)
        di, dk, dj, dl = buf.shape
        diag[ndiag:ndiag+di*nao] = buf.reshape(di*nao,di*nao).diagonal()
        ndiag += di * nao
    nu = numpy.argmax(diag)
    delta_max = diag[nu]
    if verbose:
        print(" # Generating Cholesky decomposition of ERIs."%nchol_max)
        print(" # max number of cholesky vectors = %d"%nchol_max)
        header = ['iteration', 'max_residual', 'time']
        print(format_fixed_width_strings(header))
        init = [delta_max, time.time()-start]
        print('{:17d} '.format(0)+format_fixed_width_floats(init))
    j = nu // nao
    l = nu % nao
    sj = numpy.searchsorted(dims, j)
    sl = numpy.searchsorted(dims, l)
    if dims[sj] != j and j != 0:
        sj -= 1
    if dims[sl] != l and l != 0:
        sl -= 1
    Mapprox = numpy.zeros(nao*nao)
    # ERI[:,jl]
    eri_col = mol.intor('int2e_sph',
                         shls_slice=(0,mol.nbas,0,mol.nbas,sj,sj+1,sl,sl+1))
    cj, cl = max(j-dims[sj],0), max(l-dims[sl],0)
    chol_vecs[0] = numpy.copy(eri_col[:,:,cj,cl].reshape(nao*nao)) / delta_max**0.5

    nchol = 0
    while abs(delta_max) > max_error:
        # Update cholesky vector
        start = time.time()
        # M'_ii = \sum_x L_i^x L_i^x
        Mapprox += chol_vecs[nchol] * chol_vecs[nchol]
        # D_ii = M_ii - M'_ii
        delta = diag - Mapprox
        nu = numpy.argmax(numpy.abs(delta))
        delta_max = numpy.abs(delta[nu])
        # Compute ERI chunk.
        # shls_slice computes shells of integrals as determined by the angular
        # momentum of the basis function and the number of contraction
        # coefficients. Need to search for AO index within this shell indexing
        # scheme.
        # AO index.
        j = nu // nao
        l = nu % nao
        # Associated shell index.
        sj = numpy.searchsorted(dims, j)
        sl = numpy.searchsorted(dims, l)
        if dims[sj] != j and j != 0:
            sj -= 1
        if dims[sl] != l and l != 0:
            sl -= 1
        # Compute ERI chunk.
        eri_col = mol.intor('int2e_sph',
                            shls_slice=(0,mol.nbas,0,mol.nbas,sj,sj+1,sl,sl+1))
        # Select correct ERI chunk from shell.
        cj, cl = max(j-dims[sj],0), max(l-dims[sl],0)
        Munu0 = eri_col[:,:,cj,cl].reshape(nao*nao)
        # Updated residual = \sum_x L_i^x L_nu^x
        R = numpy.dot(chol_vecs[:nchol+1,nu], chol_vecs[:nchol+1,:])
        chol_vecs[nchol+1] = (Munu0 - R) / (delta_max)**0.5
        nchol += 1
        if verbose:
            step_time = time.time() - start

            out = [delta_max, step_time]
            print('{:17d} '.format(nchol)+format_fixed_width_floats(out))

    return chol_vecs[:nchol]
Beispiel #3
0
    def run(self, comm, Xaoik, Xaolj, part, kpts, nmo_pk, nmo_max, Qi, gmap):
        # Setup residual matrix.
        ngs = Xaoik.shape[1]
        nkpts = len(kpts)
        t0 = time.clock()
        residual, k1max, k2max, i1max, i2max, maxv = (self.generate_diagonal(
            Xaoik, Xaolj, part, nmo_pk))
        t1 = time.clock()
        if part.rank == 0 and self.verbose:
            print(" # Time to generate diagonal (initial residual):"
                  " {:.2e}".format(t1 - t0))
            sys.stdout.flush()
        comm.Allgather(
            numpy.array([k1max, k2max, i1max, i2max, maxv],
                        dtype=numpy.float64), self.maxres_buff)
        vmax = 0
        k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)
        try:
            done = numpy.zeros((nkpts, nkpts, nmo_max, nmo_max), numpy.int32)
            maxresidual = numpy.zeros(part.maxvecs, dtype=numpy.float64)
            cholvecs = numpy.zeros((part.nkk, part.nij, part.maxvecs),
                                   dtype=numpy.complex128)
            Xkl = numpy.zeros(ngs, dtype=numpy.complex128)
            Xkl0 = numpy.zeros(ngs + part.maxvecs, dtype=numpy.complex128)
            Vbuff = numpy.zeros(part.maxvecs, dtype=numpy.complex128)
        except MemoryError:
            print(" # Problems allocating memory for auxiliary structures for "
                  "Cholesky solver.")
        done[k3, k4, i3, i4] = 1

        tstart = time.clock()
        if comm.rank == 0:
            sys.stdout.flush()

        more = True  # for parallel
        numv = 0
        if comm.rank == 0:
            header = [
                "iteration", "max_residual", "total_time", "time_k3k4",
                "time_comp_cholv", "time_buff"
            ]
            if self.verbose:
                print(format_fixed_width_strings(header))
        while more:
            t0 = time.clock()
            # stop condition
            if numv >= part.maxvecs:
                print(" Too many vectors needed to converge. Increase maximum "
                      "number of vectors.")
                break
            kkmax = k3 * nkpts + k4
            if comm.size <= nkpts:
                ipr = bisect(part.kkbounds[1:comm.size + 1], kkmax)
                if (kkmax >= part.kk0) & (kkmax < part.kkN):
                    assert (comm.rank == ipr)
            else:
                i34 = i3 * nmo_pk[k4] + i4
                for i in range(part.nproc_pk):
                    ij0_, ijN_ = fair_share(nmo_max * nmo_max, part.nproc_pk,
                                            i)
                    if i34 < ijN_:
                        ipr = k3 * part.nproc_pk + i
                        break

            # bcast Xaolj/CV[k3,k4,i34,0:numv]
            if comm.rank == ipr:
                assert (((i3 * nmo_pk[k4] + i4) >= part.ij0) &
                        ((i3 * nmo_pk[k4] + i4) < part.ijN))
                Xkl0[0:ngs] = Xaolj[kkmax - part.kk0, 0:ngs,
                                    i3 * nmo_pk[k4] + i4 - part.ij0]
                Xkl0[ngs:ngs + numv] = (cholvecs[kkmax - part.kk0,
                                                 i3 * nmo_pk[k4] + i4 -
                                                 part.ij0, 0:numv])
                Vbuff[0:numv] = (cholvecs[kkmax - part.kk0,
                                          i3 * nmo_pk[k4] + i4 - part.ij0,
                                          0:numv])
                comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
            else:
                comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                Vbuff[0:numv] = Xkl0[ngs:ngs + numv]

            # add new Cholesky vector
            # 1. evaluate new column (ik|imax,kmax)

            t1 = time.clock()
            tadd = 0.0

            for k in range(part.nkk):
                k1 = part.n2k1[k]
                k2 = part.n2k2[k]
                if k3 == self.kconserv[k1, k2, k4]:
                    q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4]
                    if numpy.sum(abs(q1)) > 1e-9:
                        t_ = time.clock()
                        ip = -1
                        for ii in range(27):
                            if numpy.sum(
                                    numpy.linalg.norm(q1 - Qi[ii, :])) < 1e-12:
                                ip = ii
                                break
                        if ip < 0:
                            print(" # Could not find Q: {} {} ".format(q1, Qi))
                            sys.exit()
                        for ix in range(ngs):
                            Xkl[ix] = Xkl0[gmap[ip, ix]]
                        tadd += time.clock() - t_
                    else:
                        Xkl[0:ngs] = Xkl0[0:ngs]
                    n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0
                    cholvecs[k, 0:n_,
                             numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj())

            t2 = time.clock()

            # 2. subtract projection along previous components
            cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv],
                                              Vbuff[0:numv].conj())
            cholvecs[:, :, numv] /= math.sqrt(vmax)

            # update residual
            residual -= (cholvecs[:, :, numv] *
                         cholvecs[:, :, numv].conj()).real

            maxv = 0
            for k in range(part.nkk):
                k1 = part.n2k1[k]
                k2 = part.n2k2[k]
                for ij in range(part.nij):
                    if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]:
                        break
                    if abs(residual[k, ij]) > maxv:
                        maxv = abs(residual[k, ij])
                        k1max = k1
                        k2max = k2
                        i1max = (ij + part.ij0) // nmo_pk[k2]
                        i2max = (ij + part.ij0) % nmo_pk[k2]

            t3 = time.clock()

            # assemble full CV on head node
            comm.Allgather(
                numpy.array([k1max, k2max, i1max, i2max, maxv],
                            dtype=numpy.float64), self.maxres_buff)
            k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)

            t4 = time.clock()

            # only root keeps track of residual and I/O
            if part.rank == 0:

                maxresidual[numv] = abs(vmax)

                # print and evaluate stop condition
                output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0]
                if self.verbose:
                    print("{:17d} ".format(numv) +
                          format_fixed_width_floats(output))
                # print("{:8d}  {:13.8e}".format(numv, vmax))
                tstart = time.clock()

                if numv % 100 == 0:
                    sys.stdout.flush()

            numv += 1
            if vmax < self.gtol_chol:
                more = False

            if (done[k3, k4, i3, i4] > 0) and more:
                print("Error in Cholesky decomposition. "
                      "done[imax,kmax]>0.", k3, k4, i3, i4, vmax)
                sys.exit()
            done[k3, k4, i3, i4] = 1

            t6 = time.clock()
        return cholvecs[:, :, :numv]
Beispiel #4
0
    def run(self, comm, X, h5file):
        # Unpack for convenience.
        ngs = self.ngs
        nmo_max = self.nmo_max
        nkpts = self.nkpts
        part = self.part
        nmo_pk = self.nmo_pk
        QKToK2 = self.QKToK2
        kpts = self.kpts
        # Setup residual matrix.
        mem = 2 * 16 * nkpts * ngs * nmo_max**2 / 1024**3
        if self.verbose and comm.rank == 0:
            print(" # Approx total memory required for orbital products: "
                  "{:.2e} GB.".format(mem))
        mem_verbose = comm.rank == 0 and self.verbose > 1
        if mem_verbose:
            print(" # Approx memory per MPI task for auxiliary structures.")
        Xaoik = alloc_helper((part.nkk, ngs, part.nij),
                             dtype=numpy.complex128,
                             name='Xaoik',
                             verbose=mem_verbose)
        Xaolj = alloc_helper((part.nkk, ngs, part.nij),
                             dtype=numpy.complex128,
                             name='Xaolj',
                             verbose=mem_verbose)
        done = alloc_helper((nkpts, nmo_max, nmo_max),
                            numpy.int32,
                            name='done',
                            verbose=mem_verbose)
        maxresidual = alloc_helper((part.maxvecs, ),
                                   dtype=numpy.float64,
                                   name='maxresidual',
                                   verbose=mem_verbose)
        cholvecs = alloc_helper((part.nkk, part.nij, part.maxvecs),
                                dtype=numpy.complex128,
                                name='cholvecs',
                                verbose=mem_verbose)
        Xkl = alloc_helper((ngs, ),
                           dtype=numpy.complex128,
                           name='xkl',
                           verbose=mem_verbose)
        Xkl0 = alloc_helper((ngs + part.maxvecs, ),
                            dtype=numpy.complex128,
                            name='xkl0',
                            verbose=mem_verbose)
        Vbuff = alloc_helper((part.maxvecs, ),
                             dtype=numpy.complex128,
                             name='Vbuff',
                             verbose=mem_verbose)
        num_cholvecs = alloc_helper((nkpts, ),
                                    dtype=numpy.int32,
                                    name='num_cholvecs',
                                    verbose=mem_verbose)
        header = [
            "iteration", "max_residual", "total_time", "time_k3k4",
            "time_comp_cholv", "time_buff"
        ]

        for Q in range(nkpts):
            t0 = time.clock()
            if Q > self.kminus[Q]:
                continue
            if comm.rank == 0 and self.verbose:
                print(
                    " # Calculating factorization for momentum: {}".format(Q))
                print(" # Generating orbital products")
                sys.stdout.flush()

            t1 = time.clock()

            maxresidual[:] = 0
            done[:, :, :] = 0
            cholvecs[:, :, :] = 0
            start = time.time()
            self.generate_orbital_products(Q, X, Xaoik, Xaolj)
            if self.verbose and comm.rank == 0:
                print(" # Time to generate orbital products: "
                      "{:13.8e}".format(time.time() - start))
            residual, k1max, k2max, i1max, i2max, maxv = (
                self.generate_diagonal(Q, Xaoik, Xaolj))

            buff = numpy.array([k1max, k2max, i1max, i2max, maxv],
                               dtype=numpy.float64)
            comm.Allgather(buff, self.maxres_buff)
            k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)
            done[k3, i3, i4] = 1

            tstart = time.clock()
            if comm.rank == 0:
                sys.stdout.flush()

            cnt = 0
            vmaxold = vmax
            more = True  # for parallel
            numv = 0
            while more:

                t0 = time.clock()
                # stop condition
                if comm.rank == 0 and self.verbose:
                    if numv == 0:
                        print(format_fixed_width_strings(header))
                if numv >= self.maxvecs:
                    print(" Too many vectors needed to converge. "
                          "Increase maximum number of vectors.")
                    break

                if comm.size <= nkpts:
                    ipr = bisect(part.kkbounds[1:comm.size + 1], k3)
                    if (k3 >= part.kk0) and (k3 < part.kkN):
                        assert (comm.rank == ipr)
                else:
                    i34 = i3 * nmo_pk[k4] + i4
                    for i in range(part.nproc_pk):
                        ij0_, ijN_ = fair_share(nmo_max * nmo_max,
                                                part.nproc_pk, i)
                        if i34 < ijN_:
                            ipr = k3 * part.nproc_pk + i
                            break

                if comm.rank == ipr:
                    assert ((i3 * nmo_pk[k4] + i4 >= part.ij0))
                    assert ((i3 * nmo_pk[k4] + i4 < part.ijN))
                    Xkl0[0:ngs] = Xaolj[k3 - part.kk0, 0:ngs,
                                        i3 * nmo_pk[k4] + i4 - part.ij0]
                    Xkl0[ngs:ngs + numv] = cholvecs[k3 - part.kk0,
                                                    i3 * nmo_pk[k4] + i4 -
                                                    part.ij0, 0:numv]
                    Vbuff[0:numv] = cholvecs[k3 - part.kk0,
                                             i3 * nmo_pk[k4] + i4 - part.ij0,
                                             0:numv]
                    comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                else:
                    comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                    Vbuff[0:numv] = Xkl0[ngs:ngs + numv]

                # add new Cholesky vector
                # 1. evaluate new column (ik|imax,kmax)

                t1 = time.clock()
                tadd = 0.0
                for k in range(part.nkk):
                    k1 = k + part.kk0
                    k2 = QKToK2[Q][k1]
                    if part.ij0 > nmo_pk[k1] * nmo_pk[k2]:
                        continue
                    if numpy.sum(abs(kpts[k2] - kpts[k1] + kpts[k3] -
                                     kpts[k4])) > 1e-9:
                        t_ = time.clock()
                        q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4]
                        ip = -1
                        for ii in range(27):
                            if numpy.sum(
                                    numpy.linalg.norm(q1 -
                                                      self.Qi[ii, :])) < 1e-12:
                                ip = ii
                                break
                        if ip < 0:
                            print("Could not find Q: {} {}".format(
                                q1, self.Qi))
                            sys.exit()
                        for ix in range(ngs):
                            Xkl[ix] = Xkl0[self.gmap[ip, ix]]
                        tadd += time.clock() - t_
                    else:
                        Xkl[0:ngs] = Xkl0[0:ngs]
                    n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0
                    cholvecs[k, 0:n_,
                             numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj())
                t2 = time.clock()

                # 2. substract projection along previous components
                cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv],
                                                  Vbuff[0:numv].conj())
                cholvecs[:, :, numv] /= math.sqrt(vmax)

                # update residual
                residual -= (cholvecs[:, :, numv] *
                             cholvecs[:, :, numv].conj()).real

                maxv = 0
                k1max = -1
                k2max = -1
                i1max = -1
                i2max = -1
                for k in range(part.nkk):
                    k1 = k + part.kk0
                    k2 = self.QKToK2[Q][k1]
                    # if ij0 > nmo_pk[k1]*nmo_pk[k2]:
                    # continue
                    for ij in range(part.nij):
                        if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]:
                            break
                        if abs(residual[k, ij]) > maxv:
                            maxv = abs(residual[k, ij])
                            k1max = k1
                            k2max = k2
                            i1max = (ij + part.ij0) // nmo_pk[k2]
                            i2max = (ij + part.ij0) % nmo_pk[k2]

                t3 = time.clock()

                # assemble full CV on head node
                buff = numpy.array([k1max, k2max, i1max, i2max, maxv],
                                   dtype=numpy.float64)
                comm.Allgather(buff, self.maxres_buff)
                k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)

                t4 = time.clock()

                # only root keeps track of residual and I/O
                if comm.rank == 0:

                    maxresidual[numv] = abs(vmax)
                    # print and evaluate stop condition
                    output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0]
                    if self.verbose:
                        print("{:17d} ".format(numv) +
                              format_fixed_width_floats(output))
                    tstart = time.clock()

                    if numv % 100 == 0:
                        sys.stdout.flush()

                numv += 1
                if vmax < self.gtol_chol:
                    more = False

                if done[k3, i3, i4] > 0 and more:
                    # TODO: What is this?
                    print(
                        "Error in Cholesky decomposition. "
                        "done[imax,kmax]>0.", k3, k4, i3, i4, vmax)
                    sys.exit()
                done[k3, i3, i4] = 1

                t6 = time.clock()

            comm.barrier()
            num_cholvecs[Q] = numv

            if h5file.phdf or comm.rank == 0:
                LQ = h5file.grp_v2.create_dataset(
                    "L" + str(Q), (nkpts, nmo_max * nmo_max * numv, 2),
                    dtype=numpy.float64)
                # cholvecs[nkk,nij,maxvecs]
                for kk in range(part.nkk):
                    T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy())
                    T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0)
                    LQ[kk + part.kk0, part.ij0 * numv:part.ijN * numv, :] = T_
                    T_ = None
            else:
                Ldim = h5file.grp_v2.create_dataset(
                    "Ldim" + str(Q),
                    data=numpy.array([
                        part.nkk, part.nij, part.kk0, part.ij0, part.ijN, numv
                    ],
                                     dtype=numpy.int32))
                LQ = h5file.grp_v2.create_dataset(
                    "L" + str(Q), (part.nkk, part.nij * numv, 2),
                    dtype=numpy.float64)
                # cholvecs[nkk,nij,maxvecs]
                for kk in range(part.nkk):
                    T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy())
                    T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0)
                    LQ[kk, :, :] = T_
                    T_ = None
            comm.barrier()

        h5file.grp.create_dataset("NCholPerKP", data=num_cholvecs)
        comm.barrier()