コード例 #1
0
ファイル: generator2.py プロジェクト: eojons/gpaw-scme
    def check(self, l):
        basis = self.aea.channels[0].basis
        eps = basis.eps
        alpha_B = basis.alpha_B

        basis = GaussianBasis(l, alpha_B, self.rgd, eps)
        H_bb = basis.calculate_potential_matrix(self.vtr_g)
        H_bb += basis.T_bb
        S_bb = np.eye(len(basis))

        n0 = 0
        if l < len(self.waves_l):
            waves = self.waves_l[l]
            if len(waves) > 0:
                P_bn = self.rgd.integrate(basis.basis_bg[:, None] *
                                          waves.pt_ng) / (4 * pi)
                H_bb += np.dot(np.dot(P_bn, waves.dH_nn), P_bn.T)
                S_bb += np.dot(np.dot(P_bn, waves.dS_nn), P_bn.T)
                n0 = waves.n_n[0] - l - 1
                if n0 < 0 and l < len(self.aea.channels):
                    n0 = (self.aea.channels[l].f_n > 0).sum()
        elif l < len(self.aea.channels):
            n0 = (self.aea.channels[l].f_n > 0).sum()

        e_b = np.empty(len(basis))
        general_diagonalize(H_bb, e_b, S_bb)
        return e_b, n0
コード例 #2
0
    def check(self, l):
        basis = self.aea.channels[0].basis
        eps = basis.eps
        alpha_B = basis.alpha_B

        basis = GaussianBasis(l, alpha_B, self.rgd, eps)
        H_bb = basis.calculate_potential_matrix(self.vtr_g)
        H_bb += basis.T_bb
        S_bb = np.eye(len(basis))

        n0 = 0
        if l < len(self.waves_l):
            waves = self.waves_l[l]
            if len(waves) > 0:
                P_bn = self.rgd.integrate(
                    basis.basis_bg[:, None] * waves.pt_ng) / (4 * pi)
                H_bb += np.dot(np.dot(P_bn, waves.dH_nn), P_bn.T)
                S_bb += np.dot(np.dot(P_bn, waves.dS_nn), P_bn.T)
                n0 = waves.n_n[0] - l - 1
                if n0 < 0 and l < len(self.aea.channels):
                    n0 = (self.aea.channels[l].f_n > 0).sum()
        elif l < len(self.aea.channels):
            n0 = (self.aea.channels[l].f_n > 0).sum()

        e_b = np.empty(len(basis))
        general_diagonalize(H_bb, e_b, S_bb)
        return e_b, n0
コード例 #3
0
    def diagonalize(self, h):
        ng = 350
        t = self.text
        t()
        t('Diagonalizing with gridspacing h=%.3f' % h)
        R = h * np.arange(1, ng + 1)
        G = (self.N * R / (self.beta + R) + 0.5).astype(int)
        G = np.clip(G, 1, self.N - 2)
        R1 = np.take(self.r, G - 1)
        R2 = np.take(self.r, G)
        R3 = np.take(self.r, G + 1)
        x1 = (R - R2) * (R - R3) / (R1 - R2) / (R1 - R3)
        x2 = (R - R1) * (R - R3) / (R2 - R1) / (R2 - R3)
        x3 = (R - R1) * (R - R2) / (R3 - R1) / (R3 - R2)

        def interpolate(f):
            f1 = np.take(f, G - 1)
            f2 = np.take(f, G)
            f3 = np.take(f, G + 1)
            return f1 * x1 + f2 * x2 + f3 * x3

        vt = interpolate(self.vt)
        t()
        t('state   all-electron     PAW')
        t('-------------------------------')
        for l in range(4):
            if l <= self.lmax:
                q_n = np.array([interpolate(q) for q in self.q_ln[l]])
                H = np.dot(np.transpose(q_n), np.dot(self.dH_lnn[l], q_n)) * h
                S = np.dot(np.transpose(q_n), np.dot(self.dO_lnn[l], q_n)) * h
            else:
                H = np.zeros((ng, ng))
                S = np.zeros((ng, ng))
            H.ravel()[::ng + 1] += vt + 1.0 / h**2 + l * (l + 1) / 2.0 / R**2
            H.ravel()[1::ng + 1] -= 0.5 / h**2
            H.ravel()[ng::ng + 1] -= 0.5 / h**2
            S.ravel()[::ng + 1] += 1.0
            e_n = np.zeros(ng)
            general_diagonalize(H, e_n, S)
            ePAW = e_n[0]
            if l <= self.lmax and self.n_ln[l][0] > 0:
                eAE = self.e_ln[l][0]
                t('%d%s:   %12.6f %12.6f' %
                  (self.n_ln[l][0], 'spdf'[l], eAE, ePAW),
                  end='')
                if abs(eAE - ePAW) > 0.014:
                    t('  GHOST-STATE!')
                    self.ghost = True
                else:
                    t()
            else:
                t('*%s:                %12.6f' % ('spdf'[l], ePAW), end='')
                if ePAW < self.emax:
                    t('  GHOST-STATE!')
                    self.ghost = True
                else:
                    t()
        t('-------------------------------')
コード例 #4
0
ファイル: generator.py プロジェクト: eojons/gpaw-scme
 def diagonalize(self, h):
     ng = 350
     t = self.text
     t()
     t('Diagonalizing with gridspacing h=%.3f' % h)
     R = h * np.arange(1, ng + 1)
     G = (self.N * R / (self.beta + R) + 0.5).astype(int)
     G = np.clip(G, 1, self.N - 2)
     R1 = np.take(self.r, G - 1)
     R2 = np.take(self.r, G)
     R3 = np.take(self.r, G + 1)
     x1 = (R - R2) * (R - R3) / (R1 - R2) / (R1 - R3)
     x2 = (R - R1) * (R - R3) / (R2 - R1) / (R2 - R3)
     x3 = (R - R1) * (R - R2) / (R3 - R1) / (R3 - R2)
     def interpolate(f):
         f1 = np.take(f, G - 1)
         f2 = np.take(f, G)
         f3 = np.take(f, G + 1)
         return f1 * x1 + f2 * x2 + f3 * x3
     vt = interpolate(self.vt)
     t()
     t('state   all-electron     PAW')
     t('-------------------------------')
     for l in range(4):
         if l <= self.lmax:
             q_n = np.array([interpolate(q) for q in self.q_ln[l]])
             H = np.dot(np.transpose(q_n),
                        np.dot(self.dH_lnn[l], q_n)) * h
             S = np.dot(np.transpose(q_n),
                        np.dot(self.dO_lnn[l], q_n)) * h
         else:
             H = np.zeros((ng, ng))
             S = np.zeros((ng, ng))
         H.ravel()[::ng + 1] += vt + 1.0 / h**2 + l * (l + 1) / 2.0 / R**2
         H.ravel()[1::ng + 1] -= 0.5 / h**2
         H.ravel()[ng::ng + 1] -= 0.5 / h**2
         S.ravel()[::ng + 1] += 1.0
         e_n = np.zeros(ng)
         general_diagonalize(H, e_n, S)
         ePAW = e_n[0]
         if l <= self.lmax and self.n_ln[l][0] > 0:
             eAE = self.e_ln[l][0]
             t('%d%s:   %12.6f %12.6f' % (self.n_ln[l][0],
                                          'spdf'[l], eAE, ePAW), end='')
             if abs(eAE - ePAW) > 0.014:
                 t('  GHOST-STATE!')
                 self.ghost = True
             else:
                 t()
         else:
             t('*%s:                %12.6f' % ('spdf'[l], ePAW), end='')
             if ePAW < self.emax:
                 t('  GHOST-STATE!')
                 self.ghost = True
             else:
                 t()
     t('-------------------------------')
コード例 #5
0
    def iterate(self, hamiltonian, wfs):
        if not self.initialized:
            self.initialize(wfs)

        r = self.gd.r_g
        h = r[0]
        N = len(r)
        lmax = len(self.f_sln[0]) - 1
        setup = wfs.setups[0]

        e_n = np.zeros(N)

        for s in range(wfs.nspins):
            dH_ii = unpack(hamiltonian.dH_asp[0][s])
            kpt = wfs.kpt_u[s]
            N1 = 0
            for l in range(lmax + 1):
                H = self.T_l[l] + np.diag(hamiltonian.vt_sg[s])
                i1 = 0
                for pt1, l1 in zip(self.pt_j, setup.l_j):
                    i2 = 0
                    for pt2, l2 in zip(self.pt_j, setup.l_j):
                        if l1 == l2 == l:
                            H += (h * dH_ii[i1, i2] *
                                  np.outer(pt1 * r, pt2 * r))
                        i2 += 2 * l2 + 1
                    i1 += 2 * l1 + 1
                general_diagonalize(H, e_n, self.S_l[l].copy())

                for n in range(len(self.f_sln[s][l])):
                    N2 = N1 + 2 * l + 1
                    kpt.eps_n[N1:N2] = e_n[n]
                    kpt.psit_nG[N1:N2] = H[n] / r / sqrt(h)
                    i1 = 0
                    for pt, ll in zip(self.pt_j, setup.l_j):
                        i2 = i1 + 2 * ll + 1
                        if ll == l:
                            P = np.dot(kpt.psit_nG[N1], pt * r**2) * h
                            kpt.P_ani[0][N1:N2, i1:i2] = P * np.eye(2 * l + 1)
                        i1 = i2
                    N1 = N2
コード例 #6
0
    def iterate(self, hamiltonian, wfs):
        if not self.initialized:
            self.initialize(wfs)
        
        r = self.gd.r_g
        h = r[0]
        N = len(r)
        lmax = len(self.f_sln[0]) - 1
        setup = wfs.setups[0]

        e_n = np.zeros(N)

        for s in range(wfs.nspins):
            dH_ii = unpack(hamiltonian.dH_asp[0][s])
            kpt = wfs.kpt_u[s]
            N1 = 0
            for l in range(lmax + 1):
                H = self.T_l[l] + np.diag(hamiltonian.vt_sg[s])
                i1 = 0
                for pt1, l1 in zip(self.pt_j, setup.l_j):
                    i2 = 0
                    for pt2, l2 in zip(self.pt_j, setup.l_j):
                        if l1 == l2 == l:
                            H += (h * dH_ii[i1, i2] *
                                  np.outer(pt1 * r,  pt2 * r))
                        i2 += 2 * l2 + 1
                    i1 += 2 * l1 + 1
                general_diagonalize(H, e_n, self.S_l[l].copy())

                for n in range(len(self.f_sln[s][l])):
                    N2 = N1 + 2 * l + 1
                    kpt.eps_n[N1:N2] = e_n[n]
                    kpt.psit_nG[N1:N2] = H[n] / r / sqrt(h)
                    i1 = 0
                    for pt, ll in zip(self.pt_j, setup.l_j):
                        i2 = i1 + 2 * ll + 1
                        if ll == l:
                            P = np.dot(kpt.psit_nG[N1], pt * r**2) * h
                            kpt.P_ani[0][N1:N2, i1:i2] = P * np.eye(2 * l + 1)
                        i1 = i2
                    N1 = N2
コード例 #7
0
    def iterate_one_k_point(self, ham, wfs, kpt, Vt_xdMM):
        assert wfs.gd.comm.size == 1, 'No quite sure this works!'
        if wfs.bd.comm.size > 1 and wfs.bd.strided:
            raise NotImplementedError

        H_xMM = []
        for x in range(4):
            kpt.s = x
            H_MM = self.calculate_hamiltonian_matrix(ham,
                                                     wfs,
                                                     kpt,
                                                     Vt_xdMM[x],
                                                     root=0,
                                                     add_kinetic=(x == 0))
            H_xMM.append(H_MM)
        kpt.s = None

        S_MM = wfs.S_qMM[kpt.q]
        M = len(S_MM)
        S2_MM = np.zeros((2 * M, 2 * M), complex)
        H2_MM = np.zeros((2 * M, 2 * M), complex)

        S2_MM[:M, :M] = S_MM
        S2_MM[M:, M:] = S_MM

        H2_MM[:M, :M] = H_xMM[0] + H_xMM[3]
        H2_MM[M:, M:] = H_xMM[0] - H_xMM[3]

        kpt.eps_n = np.empty(2 * wfs.bd.mynbands)

        diagonalization_string = repr(self.diagonalizer)
        wfs.timer.start(diagonalization_string)
        from gpaw.utilities.lapack import general_diagonalize
        general_diagonalize(H2_MM, kpt.eps_n, S2_MM)
        kpt.C_nM = H2_MM
        #self.diagonalizer.diagonalize(H2_MM, kpt.C_nM, kpt.eps_n, S2_MM)
        wfs.timer.stop(diagonalization_string)
        kpt.C_nM.shape = (wfs.bd.mynbands * 4, M)
コード例 #8
0
    def iterate_one_k_point(self, hamiltonian, wfs, kpt):
        """Do Davidson iterations for the kpoint"""
        niter = self.niter
        nbands = self.nbands

        gd = wfs.matrixoperator.gd

        psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt)
        # Note that psit_nG is now in self.operator.work1_nG and
        # Htpsit_nG is in kpt.psit_nG!

        H_2n2n = self.H_2n2n
        S_2n2n = self.S_2n2n
        eps_2n = self.eps_2n
        psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape)

        self.timer.start("Davidson")
        R_nG = Htpsit_nG
        self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG)

        def integrate(a_G, b_G):
            return np.real(wfs.integrate(a_G, b_G, global_integral=False))

        for nit in range(niter):
            H_2n2n[:] = 0.0
            S_2n2n[:] = 0.0

            norm_n = np.zeros(nbands)
            error = 0.0
            for n in range(nbands):
                if kpt.f_n is None:
                    weight = kpt.weight
                else:
                    weight = kpt.f_n[n]
                if self.nbands_converge != "occupied":
                    if n < self.nbands_converge:
                        weight = kpt.weight
                    else:
                        weight = 0.0
                error += weight * integrate(R_nG[n], R_nG[n])

                ekin = self.preconditioner.calculate_kinetic_energy(psit_nG[n : n + 1], kpt)
                psit2_nG[n] = self.preconditioner(R_nG[n : n + 1], kpt, ekin)

                if self.normalize:
                    norm_n[n] = integrate(psit2_nG[n], psit2_nG[n])

                H_2n2n[n, n] = kpt.eps_n[n]
                S_2n2n[n, n] = 1.0

            if self.normalize:
                gd.comm.sum(norm_n)
                for norm, psit2_G in zip(norm_n, psit2_nG):
                    psit2_G *= norm ** -0.5

            # Calculate projections
            P2_ani = wfs.pt.dict(nbands)
            wfs.pt.integrate(psit2_nG, P2_ani, kpt.q)

            # Hamiltonian matrix
            # <psi2 | H | psi>
            wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit2_nG, Htpsit_nG)
            gd.integrate(psit_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn)
            # gemm(1.0, psit_nG, Htpsit_nG, 0.0, self.H_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj()))

            gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, :nbands] = self.H_nn

            # <psi2 | H | psi2>
            gd.integrate(psit2_nG, Htpsit_nG, global_integral=False, _transposed_result=self.H_nn)
            # r2k(0.5 * gd.dv, psit2_nG, Htpsit_nG, 0.0, self.H_nn)
            for a, P2_ni in P2_ani.items():
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj()))

            gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, nbands:] = self.H_nn

            # Overlap matrix
            # <psi2 | S | psi>
            gd.integrate(psit_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn)
            # gemm(1.0, psit_nG, psit2_nG, 0.0, self.S_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj()))

            gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, :nbands] = self.S_nn

            # <psi2 | S | psi2>
            gd.integrate(psit2_nG, psit2_nG, global_integral=False, _transposed_result=self.S_nn)
            # rk(gd.dv, psit2_nG, 0.0, self.S_nn)
            for a, P2_ni in P2_ani.items():
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj()))

            gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, nbands:] = self.S_nn

            if gd.comm.rank == 0:
                m = 0
                if self.smin:
                    s_N, U_NN = np.linalg.eigh(S_2n2n)
                    m = int((s_N < self.smin).sum())

                if m == 0:
                    general_diagonalize(H_2n2n, eps_2n, S_2n2n)
                else:
                    T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:] ** -0.5))
                    H_2n2n[:nbands, nbands:] = H_2n2n[nbands:, :nbands].conj().T
                    eps_2n[:-m], P_nn = np.linalg.eigh(np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn))
                    H_2n2n[:-m] = np.dot(T_Nn, P_nn).T

            gd.comm.broadcast(H_2n2n, 0)
            gd.comm.broadcast(eps_2n, 0)

            kpt.eps_n[:] = eps_2n[:nbands]

            # Rotate psit_nG
            gd.gemm(1.0, psit_nG, H_2n2n[:nbands, :nbands], 0.0, Htpsit_nG)
            gd.gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:], 1.0, Htpsit_nG)
            psit_nG, Htpsit_nG = Htpsit_nG, psit_nG

            # Rotate P_uni:
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni)
                gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni)

            if nit < niter - 1:
                wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, Htpsit_nG)
                R_nG = Htpsit_nG
                self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG)

        self.timer.stop("Davidson")
        error = gd.comm.sum(error)
        return error, psit_nG
コード例 #9
0
 def _diagonalize(self, H_MM, S_MM, eps_M):
     """Serial diagonalize via LAPACK."""
     # This is replicated computation but ultimately avoids
     # additional communication
     general_diagonalize(H_MM, eps_M, S_MM)
コード例 #10
0
ファイル: scalapack.py プロジェクト: eojons/gpaw-scme
def main(N=73, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)
    
    if (dtype==complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1*np.tri(N, N, k= -N // nprocs) + 0.3*np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2*np.tri(N, N, k= -N // nprocs) + 0.4*np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0*np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0*np.eye(N, N, 0)
        C0 = S0.copy()

    # Local result matrices
    W0 = np.empty((N),dtype=float)
    W0_g = np.empty((N),dtype=float)

    # Calculate eigenvalues
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0) # result returned in lower triangle
        # tri2full(C0) # symmetrize
        
    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')
    scalapack_inverse_cholesky(dist, C, 'L')

    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test-C0).max()
        ## print 'diagonalize ex err', diag_ex_err
        print 'diagonalize dc err', diag_dc_err
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print 'general diagonalize dc err', general_diag_dc_err
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print 'inverse chol err', inverse_chol_err 
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err) 
    inverse_chol_err = world.sum(inverse_chol_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
コード例 #11
0
 def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M,
                            UL='L', iu=None):
     general_diagonalize(H_mm, eps_M, S_mm, iu=iu)
     C_mm[:] = H_mm
コード例 #12
0
ファイル: davidson.py プロジェクト: yihsuanliu/gpaw
    def iterate_one_k_point(self, hamiltonian, wfs, kpt):
        """Do Davidson iterations for the kpoint"""
        niter = self.niter
        nbands = self.nbands

        self.subspace_diagonalize(hamiltonian, wfs, kpt)
                    
        H_2n2n = self.H_2n2n
        S_2n2n = self.S_2n2n
        eps_2n = self.eps_2n
        psit2_nG = wfs.matrixoperator.suggest_temporary_buffer()

        self.timer.start('Davidson')
        R_nG = self.Htpsit_nG 
        self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG,
                                 kpt.P_ani, kpt.eps_n, R_nG)

        for nit in range(niter):
            H_2n2n[:] = 0.0
            S_2n2n[:] = 0.0

            error = 0.0
            for n in range(nbands):
                if kpt.f_n is None:
                    weight = kpt.weight
                else:
                    weight = kpt.f_n[n]
                if self.nbands_converge != 'occupied':
                    if n < self.nbands_converge:
                        weight = kpt.weight
                    else:
                        weight = 0.0
                error += weight * np.vdot(R_nG[n], R_nG[n]).real

                H_2n2n[n,n] = kpt.eps_n[n]
                S_2n2n[n,n] = 1.0
                psit2_nG[n] = self.preconditioner(R_nG[n], kpt)
            
            # Calculate projections
            P2_ani = wfs.pt.dict(nbands)
            wfs.pt.integrate(psit2_nG, P2_ani, kpt.q)
            
            # Hamiltonian matrix
            # <psi2 | H | psi>
            wfs.kin.apply(psit2_nG, self.Htpsit_nG, kpt.phase_cd)
            hamiltonian.apply_local_potential(psit2_nG, self.Htpsit_nG, kpt.s)
            gemm(self.gd.dv, kpt.psit_nG, self.Htpsit_nG, 0.0, self.H_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj()))

            self.gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, :nbands] = self.H_nn

            # <psi2 | H | psi2>
            r2k(0.5 * self.gd.dv, psit2_nG, self.Htpsit_nG, 0.0, self.H_nn)
            for a, P2_ni in P2_ani.items():
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj()))

            self.gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, nbands:] = self.H_nn

            # Overlap matrix
            # <psi2 | S | psi>
            gemm(self.gd.dv, kpt.psit_nG, psit2_nG, 0.0, self.S_nn, "c")
        
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj()))

            self.gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, :nbands] = self.S_nn

            # <psi2 | S | psi2>
            rk(self.gd.dv, psit2_nG, 0.0, self.S_nn)
            for a, P2_ni in P2_ani.items():
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj()))

            self.gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, nbands:] = self.S_nn

            if self.gd.comm.rank == 0:
                general_diagonalize(H_2n2n, eps_2n, S_2n2n)

            self.gd.comm.broadcast(H_2n2n, 0)
            self.gd.comm.broadcast(eps_2n, 0)

            kpt.eps_n[:] = eps_2n[:nbands]

            # Rotate psit_nG
            gemm(1.0, kpt.psit_nG, H_2n2n[:nbands, :nbands],
                 0.0, self.Htpsit_nG)
            gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:],
                 1.0, self.Htpsit_nG)
            kpt.psit_nG, self.Htpsit_nG = self.Htpsit_nG, kpt.psit_nG

            # Rotate P_uni:
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni)
                gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni)

            if nit < niter - 1 :
                wfs.kin.apply(kpt.psit_nG, self.Htpsit_nG, kpt.phase_cd)
                hamiltonian.apply_local_potential(kpt.psit_nG, self.Htpsit_nG,
                                                  kpt.s)
                R_nG = self.Htpsit_nG
                self.calculate_residuals(kpt, wfs, hamiltonian, kpt.psit_nG,
                                         kpt.P_ani, kpt.eps_n, R_nG)

        self.timer.stop('Davidson')
        error = self.gd.comm.sum(error)
        return error
コード例 #13
0
ファイル: davidson.py プロジェクト: Huaguiyuan/gpawDFT
    def iterate_one_k_point(self, hamiltonian, wfs, kpt):
        """Do Davidson iterations for the kpoint"""
        niter = self.niter
        nbands = self.nbands
        mynbands = self.mynbands

        gd = wfs.matrixoperator.gd
        bd = self.operator.bd

        psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt)
        # Note that psit_nG is now in self.operator.work1_nG and
        # Htpsit_nG is in kpt.psit_nG!

        H_2n2n = self.H_2n2n
        S_2n2n = self.S_2n2n
        eps_2n = self.eps_2n

        self.timer.start('Davidson')

        if self.keep_htpsit:
            R_nG = Htpsit_nG
            psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape)
        else:
            R_nG = wfs.empty(mynbands, q=kpt.q)
            psit2_nG = wfs.empty(mynbands, q=kpt.q)
            wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG, R_nG)
            wfs.pt.integrate(psit_nG, kpt.P_ani, kpt.q)
        
        self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG,
                                 kpt.P_ani, kpt.eps_n, R_nG)

        def integrate(a_G, b_G):
            return np.real(wfs.integrate(a_G, b_G, global_integral=False))

        # Note on band parallelization
        # The "large" H_2n2n and S_2n2n matrices are at the moment
        # global and replicated over band communicator, and the
        # general diagonalization is performed in serial i.e. without
        # scalapack

        for nit in range(niter):
            H_2n2n[:] = 0.0
            S_2n2n[:] = 0.0

            norm_n = np.zeros(mynbands)
            error = 0.0
            for n in range(mynbands):
                if kpt.f_n is None:
                    weight = kpt.weight
                else:
                    weight = kpt.f_n[n]
                if self.nbands_converge != 'occupied':
                    if n < self.nbands_converge:
                        weight = kpt.weight
                    else:
                        weight = 0.0
                error += weight * integrate(R_nG[n], R_nG[n])

                ekin = self.preconditioner.calculate_kinetic_energy(
                    psit_nG[n:n + 1], kpt)
                psit2_nG[n] = self.preconditioner(R_nG[n:n + 1], kpt, ekin)

                if self.normalize:
                    norm_n[n] = integrate(psit2_nG[n], psit2_nG[n])

                N = bd.global_index(n)
                H_2n2n[N, N] = kpt.eps_n[n]
                S_2n2n[N, N] = 1.0

            bd.comm.sum(H_2n2n)
            bd.comm.sum(S_2n2n)

            if self.normalize:
                gd.comm.sum(norm_n)
                for norm, psit2_G in zip(norm_n, psit2_nG):
                    psit2_G *= norm**-0.5
        
            # Calculate projections
            P2_ani = wfs.pt.dict(mynbands)
            wfs.pt.integrate(psit2_nG, P2_ani, kpt.q)

            self.timer.start('calc. matrices')
            
            # Hamiltonian matrix
            # <psi2 | H | psi>

            def H(psit_xG):
                result_xG = R_nG
                wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_xG,
                                             result_xG)
                return result_xG

            def dH(a, P_ni):
                return np.dot(P_ni, unpack(hamiltonian.dH_asp[a][kpt.s]))

            H_nn = self.operator.calculate_matrix_elements(psit_nG, kpt.P_ani,
                                                           H, dH, psit2_nG,
                                                           P2_ani)

            H_2n2n[nbands:, :nbands] = H_nn

            # <psi2 | H | psi2>

            def H(psit_xG):
                # H | psi2 > already calculated in previous step
                result_xG = R_nG
                return result_xG

            def dH(a, P_ni):
                return np.dot(P_ni, unpack(hamiltonian.dH_asp[a][kpt.s]))

            H_nn = self.operator.calculate_matrix_elements(psit2_nG, P2_ani,
                                                           H, dH)

            H_2n2n[nbands:, nbands:] = H_nn

            # Overlap matrix
            # <psi2 | S | psi>

            def S(psit_G):
                return psit_G
            
            def dS(a, P_ni):
                return np.dot(P_ni, wfs.setups[a].dO_ii)

            S_nn = self.operator.calculate_matrix_elements(psit_nG, kpt.P_ani,
                                                           S, dS, psit2_nG,
                                                           P2_ani)

            S_2n2n[nbands:, :nbands] = S_nn

            # <psi2 | S | psi2>
            S_nn = self.operator.calculate_matrix_elements(psit2_nG, P2_ani,
                                                           S, dS)
            S_2n2n[nbands:, nbands:] = S_nn

            self.timer.stop('calc. matrices')

            self.timer.start('diagonalize')
            if gd.comm.rank == 0 and bd.comm.rank == 0:
                m = 0
                if self.smin:
                    s_N, U_NN = np.linalg.eigh(S_2n2n)
                    m = int((s_N < self.smin).sum())

                if m == 0:
                    general_diagonalize(H_2n2n, eps_2n, S_2n2n)
                else:
                    T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:]**-0.5))
                    H_2n2n[:nbands, nbands:] = \
                        H_2n2n[nbands:, :nbands].conj().T
                    eps_2n[:-m], P_nn = np.linalg.eigh(
                        np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn))
                    H_2n2n[:-m] = np.dot(T_Nn, P_nn).T

            gd.comm.broadcast(H_2n2n, 0)
            gd.comm.broadcast(eps_2n, 0)
            bd.comm.broadcast(H_2n2n, 0)
            bd.comm.broadcast(eps_2n, 0)

            self.operator.bd.distribute(eps_2n[:nbands], kpt.eps_n[:])

            self.timer.stop('diagonalize')

            self.timer.start('rotate_psi')
            # Rotate psit_nG

            # Memory references during rotate:
            # Case 1, no band parallelization:
            #   Before 1. matrix multiply: psit_nG -> operator.work1_xG
            #   After  1. matrix multiply: psit_nG -> R_nG
            #   After  2. matrix multiply: tmp_nG -> work1_xG
            #
            # Case 2, band parallelization
            # Work arrays used only in send/recv buffers,
            # psit_nG -> psit_nG
            # tmp_nG -> psit2_nG

            psit_nG = self.operator.matrix_multiply(H_2n2n[:nbands, :nbands],
                                                    psit_nG, kpt.P_ani,
                                                    out_nG=R_nG)

            tmp_nG = self.operator.matrix_multiply(H_2n2n[:nbands, nbands:],
                                                   psit2_nG, P2_ani)

            if bd.comm.size > 1:
                psit_nG += tmp_nG
            else:
                tmp_nG += psit_nG
                psit_nG, R_nG = tmp_nG, psit_nG
            
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                P_ni += P2_ni

            self.timer.stop('rotate_psi')

            if nit < niter - 1:
                wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG,
                                             R_nG)
                self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG,
                                         kpt.P_ani, kpt.eps_n, R_nG)

        self.timer.stop('Davidson')
        error = gd.comm.sum(error)
        return error, psit_nG
コード例 #14
0
    def iterate_one_k_point(self, hamiltonian, wfs, kpt):
        """Do Davidson iterations for the kpoint"""
        niter = self.niter
        nbands = self.nbands

        gd = wfs.matrixoperator.gd

        psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt)
        # Note that psit_nG is now in self.operator.work1_nG and
        # Htpsit_nG is in kpt.psit_nG!

        H_2n2n = self.H_2n2n
        S_2n2n = self.S_2n2n
        eps_2n = self.eps_2n
        psit2_nG = reshape(self.Htpsit_nG, psit_nG.shape)

        self.timer.start('Davidson')
        R_nG = Htpsit_nG
        self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani,
                                 kpt.eps_n, R_nG)

        def integrate(a_G, b_G):
            return np.real(wfs.integrate(a_G, b_G, global_integral=False))

        for nit in range(niter):
            H_2n2n[:] = 0.0
            S_2n2n[:] = 0.0

            norm_n = np.zeros(nbands)
            error = 0.0
            for n in range(nbands):
                if kpt.f_n is None:
                    weight = kpt.weight
                else:
                    weight = kpt.f_n[n]
                if self.nbands_converge != 'occupied':
                    if n < self.nbands_converge:
                        weight = kpt.weight
                    else:
                        weight = 0.0
                error += weight * integrate(R_nG[n], R_nG[n])

                ekin = self.preconditioner.calculate_kinetic_energy(
                    psit_nG[n:n + 1], kpt)
                psit2_nG[n] = self.preconditioner(R_nG[n:n + 1], kpt, ekin)

                if self.normalize:
                    norm_n[n] = integrate(psit2_nG[n], psit2_nG[n])

                H_2n2n[n, n] = kpt.eps_n[n]
                S_2n2n[n, n] = 1.0

            if self.normalize:
                gd.comm.sum(norm_n)
                for norm, psit2_G in zip(norm_n, psit2_nG):
                    psit2_G *= norm**-0.5

            # Calculate projections
            P2_ani = wfs.pt.dict(nbands)
            wfs.pt.integrate(psit2_nG, P2_ani, kpt.q)

            # Hamiltonian matrix
            # <psi2 | H | psi>
            wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit2_nG, Htpsit_nG)
            gd.integrate(psit_nG,
                         Htpsit_nG,
                         global_integral=False,
                         _transposed_result=self.H_nn)
            # gemm(1.0, psit_nG, Htpsit_nG, 0.0, self.H_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P_ni.T.conj()))

            gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, :nbands] = self.H_nn

            # <psi2 | H | psi2>
            gd.integrate(psit2_nG,
                         Htpsit_nG,
                         global_integral=False,
                         _transposed_result=self.H_nn)
            # r2k(0.5 * gd.dv, psit2_nG, Htpsit_nG, 0.0, self.H_nn)
            for a, P2_ni in P2_ani.items():
                dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s])
                self.H_nn += np.dot(P2_ni, np.dot(dH_ii, P2_ni.T.conj()))

            gd.comm.sum(self.H_nn, 0)
            H_2n2n[nbands:, nbands:] = self.H_nn

            # Overlap matrix
            # <psi2 | S | psi>
            gd.integrate(psit_nG,
                         psit2_nG,
                         global_integral=False,
                         _transposed_result=self.S_nn)
            # gemm(1.0, psit_nG, psit2_nG, 0.0, self.S_nn, 'c')

            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.inner(dO_ii, P_ni.conj()))

            gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, :nbands] = self.S_nn

            # <psi2 | S | psi2>
            gd.integrate(psit2_nG,
                         psit2_nG,
                         global_integral=False,
                         _transposed_result=self.S_nn)
            # rk(gd.dv, psit2_nG, 0.0, self.S_nn)
            for a, P2_ni in P2_ani.items():
                dO_ii = wfs.setups[a].dO_ii
                self.S_nn += np.dot(P2_ni, np.dot(dO_ii, P2_ni.T.conj()))

            gd.comm.sum(self.S_nn, 0)
            S_2n2n[nbands:, nbands:] = self.S_nn

            if gd.comm.rank == 0:
                m = 0
                if self.smin:
                    s_N, U_NN = np.linalg.eigh(S_2n2n)
                    m = int((s_N < self.smin).sum())

                if m == 0:
                    general_diagonalize(H_2n2n, eps_2n, S_2n2n)
                else:
                    T_Nn = np.dot(U_NN[:, m:], np.diag(s_N[m:]**-0.5))
                    H_2n2n[:nbands, nbands:] = \
                        H_2n2n[nbands:, :nbands].conj().T
                    eps_2n[:-m], P_nn = np.linalg.eigh(
                        np.dot(np.dot(T_Nn.T.conj(), H_2n2n), T_Nn))
                    H_2n2n[:-m] = np.dot(T_Nn, P_nn).T

            gd.comm.broadcast(H_2n2n, 0)
            gd.comm.broadcast(eps_2n, 0)

            kpt.eps_n[:] = eps_2n[:nbands]

            # Rotate psit_nG
            gd.gemm(1.0, psit_nG, H_2n2n[:nbands, :nbands], 0.0, Htpsit_nG)
            gd.gemm(1.0, psit2_nG, H_2n2n[:nbands, nbands:], 1.0, Htpsit_nG)
            psit_nG, Htpsit_nG = Htpsit_nG, psit_nG

            # Rotate P_uni:
            for a, P_ni in kpt.P_ani.items():
                P2_ni = P2_ani[a]
                gemm(1.0, P_ni.copy(), H_2n2n[:nbands, :nbands], 0.0, P_ni)
                gemm(1.0, P2_ni, H_2n2n[:nbands, nbands:], 1.0, P_ni)

            if nit < niter - 1:
                wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, psit_nG,
                                             Htpsit_nG)
                R_nG = Htpsit_nG
                self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG,
                                         kpt.P_ani, kpt.eps_n, R_nG)

        self.timer.stop('Davidson')
        error = gd.comm.sum(error)
        return error, psit_nG
コード例 #15
0
ファイル: matrix_descriptor.py プロジェクト: eojons/gpaw-scme
 def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M,
                            UL='L'):
     general_diagonalize(H_mm, eps_M, S_mm)
     C_mm[:] = H_mm
コード例 #16
0
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if (dtype == complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) +
                             0.3 * np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) +
                             0.4 * np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0 * np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0 * np.eye(N, N, 0)
        C0 = S0.copy()
        S0_inv = S0.copy()

    # Local result matrices
    W0 = np.empty((N), dtype=float)
    W0_g = np.empty((N), dtype=float)

    # Calculate eigenvalues / other serial results
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0)  # result returned in lower triangle
        tri2full(S0_inv, 'L')
        S0_inv = inv(S0_inv)
        # tri2full(C0) # symmetrize

    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L')  # C0 was previously overwritten
    Glob2dist.redistribute(S0, Sinv, uplo='L')

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')

    scalapack_inverse_cholesky(dist, C, 'L')

    if dtype == complex:  # Only supported for complex for now
        scalapack_inverse(dist, Sinv, 'L')
    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Sinv_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)
    Dist2glob.redistribute(Sinv, Sinv_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test - C0).max()

        tri2full(Sinv_test, 'L')
        inverse_err = abs(Sinv_test - S0_inv).max()
        ## print 'diagonalize ex err', diag_ex_err
        print('diagonalize dc err', diag_dc_err)
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print('general diagonalize dc err', general_diag_dc_err)
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print('inverse chol err', inverse_chol_err)
        if dtype == complex:
            print('inverse err', inverse_err)
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0
        inverse_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err)
    inverse_chol_err = world.sum(inverse_chol_err)
    inverse_err = world.sum(inverse_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
    if dtype == complex:
        assert inverse_err < tol