def mpiverify(data, id):
    # Do some debugging when running on two procs XXX REMOVE
    if world.size == 2:
        if world.rank == 0:
            temp = -data.copy()
        else:
            temp = data.copy()
        world.sum(temp)
        err = sum(abs(temp).ravel()**2)
        if err > 1e-10:
            if world.rank == 0:
                print('Parallel assert failed: ', id, ' norm: ',
                      sum(temp.ravel()**2))
            print('Data from proc ', world.rank)
            print(data)
            assert False
Beispiel #2
0
    def test_addition_theorem(self):
        lmax = 9

        # Test that the complex spherical harmonic addition theorem holds
        thetam_L = np.random.uniform(0, np.pi, size=theta_L.shape)
        world.broadcast(thetam_L, 0)
        phim_L = np.random.uniform(0, 2*np.pi, size=phi_L.shape)
        world.broadcast(phim_L, 0)
        cosv_L = np.cos(theta_L)*np.cos(thetam_L) \
            + np.sin(theta_L)*np.sin(thetam_L)*np.cos(phi_L-phim_L)
        P0_lL = np.array([legendre(l, 0, cosv_L) for l in range(lmax+1)])
        P_lL = np.zeros_like(P0_lL)
        for l,m in lmiter(lmax, comm=world):
            P_lL[l] += 4 * np.pi / (2*l + 1.) * Y(l, m, theta_L, phi_L) \
                * Y(l, m, thetam_L, phim_L).conj()
        world.sum(P_lL)
        self.assertAlmostEqual(np.abs(P_lL-P0_lL).max(), 0, 6)
Beispiel #3
0
    def test_addition_theorem(self):
        lmax = 9

        # Test that the complex spherical harmonic addition theorem holds
        thetam_L = np.random.uniform(0, np.pi, size=theta_L.shape)
        world.broadcast(thetam_L, 0)
        phim_L = np.random.uniform(0, 2 * np.pi, size=phi_L.shape)
        world.broadcast(phim_L, 0)
        cosv_L = np.cos(theta_L)*np.cos(thetam_L) \
            + np.sin(theta_L)*np.sin(thetam_L)*np.cos(phi_L-phim_L)
        P0_lL = np.array([legendre(l, 0, cosv_L) for l in range(lmax + 1)])
        P_lL = np.zeros_like(P0_lL)
        for l, m in lmiter(lmax, comm=world):
            P_lL[l] += 4 * np.pi / (2*l + 1.) * Y(l, m, theta_L, phi_L) \
                * Y(l, m, thetam_L, phim_L).conj()
        world.sum(P_lL)
        self.assertAlmostEqual(np.abs(P_lL - P0_lL).max(), 0, 6)
Beispiel #4
0
    def test_multipole_expansion(self):
        lmax = 9
        R = 1.0
        npts = 1000
        tol = 1e-9

        # Solve ((R-dR)/(R+dR))**(lmax+1) = tol for dR
        dR = R * (1 - tol**(1. / (lmax + 1))) / (1 + tol**(1. / (lmax + 1)))
        assert abs(((R - dR) / (R + dR))**(lmax + 1) - tol) < 1e-12

        # Test multipole expansion of 1/|r-r'| in complex spherical harmonics
        r_g = np.random.uniform(R + dR, 10 * R, size=npts)
        world.broadcast(r_g, 0)
        theta_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(theta_g, 0)
        phi_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(phi_g, 0)

        r_vg = np.empty((3, npts), dtype=float)
        r_vg[0] = r_g * np.cos(phi_g) * np.sin(theta_g)
        r_vg[1] = r_g * np.sin(phi_g) * np.sin(theta_g)
        r_vg[2] = r_g * np.cos(theta_g)

        rm_g = np.random.uniform(0, R - dR, size=npts)
        world.broadcast(rm_g, 0)
        thetam_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(thetam_g, 0)
        phim_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(phim_g, 0)

        rm_vg = np.empty((3, npts), dtype=float)
        rm_vg[0] = rm_g * np.cos(phim_g) * np.sin(thetam_g)
        rm_vg[1] = rm_g * np.sin(phim_g) * np.sin(thetam_g)
        rm_vg[2] = rm_g * np.cos(thetam_g)

        f0_g = np.sum((r_vg - rm_vg)**2, axis=0)**(-0.5)
        f_g = np.zeros_like(f0_g)

        for l, m in lmiter(lmax, comm=world):
            f_g += 4 * np.pi / (2*l + 1.) * r_g**(-1) * (rm_g/r_g)**l \
                * Y(l, m, theta_g, phi_g) * Y(l, m, thetam_g, phim_g).conj()
        world.sum(f_g)

        e = np.abs(f_g - f0_g).max()
        self.assertAlmostEqual(e, 0, 9)
Beispiel #5
0
    def test_multipole_expansion(self):
        lmax = 9
        R = 1.0
        npts = 1000
        tol = 1e-9

        # Solve ((R-dR)/(R+dR))**(lmax+1) = tol for dR
        dR = R * (1 - tol**(1./(lmax+1))) / (1 + tol**(1./(lmax+1)))
        assert abs(((R-dR)/(R+dR))**(lmax+1) - tol) < 1e-12

        # Test multipole expansion of 1/|r-r'| in complex spherical harmonics
        r_g = np.random.uniform(R+dR, 10*R, size=npts)
        world.broadcast(r_g, 0)
        theta_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(theta_g, 0)
        phi_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(phi_g, 0)

        r_vg = np.empty((3, npts), dtype=float)
        r_vg[0] = r_g*np.cos(phi_g)*np.sin(theta_g)
        r_vg[1] = r_g*np.sin(phi_g)*np.sin(theta_g)
        r_vg[2] = r_g*np.cos(theta_g)

        rm_g = np.random.uniform(0, R-dR, size=npts)
        world.broadcast(rm_g, 0)
        thetam_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(thetam_g, 0)
        phim_g = np.random.uniform(0, np.pi, size=npts)
        world.broadcast(phim_g, 0)

        rm_vg = np.empty((3, npts), dtype=float)
        rm_vg[0] = rm_g*np.cos(phim_g)*np.sin(thetam_g)
        rm_vg[1] = rm_g*np.sin(phim_g)*np.sin(thetam_g)
        rm_vg[2] = rm_g*np.cos(thetam_g)

        f0_g = np.sum((r_vg-rm_vg)**2, axis=0)**(-0.5)
        f_g = np.zeros_like(f0_g)

        for l,m in lmiter(lmax, comm=world):
            f_g += 4 * np.pi / (2*l + 1.) * r_g**(-1) * (rm_g/r_g)**l \
                * Y(l, m, theta_g, phi_g) * Y(l, m, thetam_g, phim_g).conj()
        world.sum(f_g)

        e = np.abs(f_g-f0_g).max()
        self.assertAlmostEqual(e, 0, 9)
Beispiel #6
0
def parallel_eigh(matrixfile, blacsgrid=(4, 2), blocksize=64):
    """Diagonalize matrix in parallel"""
    assert np.prod(blacsgrid) == world.size
    grid = BlacsGrid(world, *blacsgrid)

    if world.rank == MASTER:
        H_MM = np.load(matrixfile)
        assert H_MM.ndim == 2
        assert H_MM.shape[0] == H_MM.shape[1]
        NM = len(H_MM)
    else:
        NM = 0
    NM = world.sum(NM) # Distribute matrix shape to all nodes

    # descriptor for the individual blocks
    block_desc = grid.new_descriptor(NM, NM, blocksize, blocksize)

    # descriptor for global array on MASTER
    local_desc = grid.new_descriptor(NM, NM, NM, NM)

    # Make some dummy array on all the slaves
    if world.rank != MASTER:
        H_MM = local_desc.zeros()
    assert local_desc.check(H_MM)

    # The local version of the matrix
    H_mm = block_desc.empty()

    # Distribute global array to smaller blocks
    redistributor = Redistributor(world, local_desc, block_desc)
    redistributor.redistribute(H_MM, H_mm)

    # Allocate arrays for eigenvalues and -vectors
    eps_M = np.empty(NM)
    C_mm = block_desc.empty()
    block_desc.diagonalize_ex(H_mm, C_mm, eps_M)

    # Collect eigenvectors on MASTER
    C_MM = local_desc.empty()
    redistributor2 = Redistributor(world, block_desc, local_desc)
    redistributor2.redistribute(C_mm, C_MM)

    # Return eigenvalues and -vectors on Master
    if world.rank == MASTER:
        return eps_M, C_MM
    else:
        return None, None
Beispiel #7
0
def parallel_eigh(matrixfile, blacsgrid=(4, 2), blocksize=64):
    """Diagonalize matrix in parallel"""
    assert np.prod(blacsgrid) == world.size
    grid = BlacsGrid(world, *blacsgrid)

    if world.rank == MASTER:
        H_MM = np.load(matrixfile)
        assert H_MM.ndim == 2
        assert H_MM.shape[0] == H_MM.shape[1]
        NM = len(H_MM)
    else:
        NM = 0
    NM = world.sum(NM) # Distribute matrix shape to all nodes

    # descriptor for the individual blocks
    block_desc = grid.new_descriptor(NM, NM, blocksize, blocksize)

    # descriptor for global array on MASTER
    local_desc = grid.new_descriptor(NM, NM, NM, NM)

    # Make some dummy array on all the slaves
    if world.rank != MASTER:
        H_MM = local_desc.zeros()
    assert local_desc.check(H_MM)

    # The local version of the matrix
    H_mm = block_desc.empty()

    # Distribute global array to smaller blocks
    redistributor = Redistributor(world, local_desc, block_desc)
    redistributor.redistribute(H_MM, H_mm)

    # Allocate arrays for eigenvalues and -vectors
    eps_M = np.empty(NM)
    C_mm = block_desc.empty()
    block_desc.diagonalize_ex(H_mm, C_mm, eps_M)

    # Collect eigenvectors on MASTER
    C_MM = local_desc.empty()
    redistributor2 = Redistributor(world, block_desc, local_desc)
    redistributor2.redistribute(C_mm, C_MM)

    # Return eigenvalues and -vectors on Master
    if world.rank == MASTER:
        return eps_M, C_MM
    else:
        return None, None
Beispiel #8
0
    def calculate_exx(self):
        """Non-selfconsistent calculation."""
        kd = self.kd
        K = len(kd.bzk_kc)
        W = world.size // self.nspins
        parallel = (W > 1)

        self.exx = 0.0
        self.exx_kq = np.zeros((K, len(self.ibzq_qc)), float)

        for s in range(self.nspins):
            ibz_kpts = [KPoint(kd, kpt) for kpt in self.kpt_u if kpt.s == s]
            for ik, kpt in enumerate(kd.bzk_kc):
                print('K %s %s ...' % (ik, kpt), file=self.txt)
                for iq, q in enumerate(self.ibzq_qc):
                    kpq = kd.find_k_plus_q(q, kpts_k=[ik])
                    self.apply(ibz_kpts[kd.bz2ibz_k[ik]],
                               ibz_kpts[kd.bz2ibz_k[kpq[0]]], ik, kpq[0], iq)

        self.exx = world.sum(self.exx)
        self.exx += self.calculate_exx_paw_correction()

        exx_q = np.sum(self.exx_kq, 0)

        print(file=self.txt)
        print('------------------------------------------------------',
              file=self.txt)
        print(file=self.txt)
        print('Contributions: q         w        E_q (eV)', file=self.txt)
        for q in range(len(exx_q)):
            print('[%1.3f %1.3f %1.3f]    %1.3f   %s' % \
                  (self.ibzq_qc[q][0], self.ibzq_qc[q][1], self.ibzq_qc[q][2],
                   self.q_weights[q]/len(self.bzq_qc),
                   exx_q[q]/self.q_weights[q]*len(self.bzq_qc)*Ha), file=self.txt)
        print('E_EXX = %s eV' % (self.exx * Ha), file=self.txt)
        print(file=self.txt)
        print('Calculation completed at:  ', ctime(), file=self.txt)
        print(file=self.txt)
        print('------------------------------------------------------',
              file=self.txt)
        print(file=self.txt)
Beispiel #9
0
    def calculate_exx(self):
        """Non-selfconsistent calculation."""
        kd = self.kd
        K = len(kd.bzk_kc)
        W = world.size // self.nspins
        parallel = W > 1

        self.exx = 0.0
        self.exx_kq = np.zeros((K, len(self.ibzq_qc)), float)

        for s in range(self.nspins):
            ibz_kpts = [KPoint(kd, kpt) for kpt in self.kpt_u if kpt.s == s]
            for ik, kpt in enumerate(kd.bzk_kc):
                print >>self.txt, "K %s %s ..." % (ik, kpt)
                for iq, q in enumerate(self.ibzq_qc):
                    kpq = kd.find_k_plus_q(q, kpts_k=[ik])
                    self.apply(ibz_kpts[kd.bz2ibz_k[ik]], ibz_kpts[kd.bz2ibz_k[kpq[0]]], ik, kpq[0], iq)

        self.exx = world.sum(self.exx)
        self.exx += self.calculate_exx_paw_correction()

        exx_q = np.sum(self.exx_kq, 0)

        print >>self.txt
        print >>self.txt, "------------------------------------------------------"
        print >>self.txt
        print >>self.txt, "Contributions: q         w        E_q (eV)"
        for q in range(len(exx_q)):
            print >>self.txt, "[%1.3f %1.3f %1.3f]    %1.3f   %s" % (
                self.ibzq_qc[q][0],
                self.ibzq_qc[q][1],
                self.ibzq_qc[q][2],
                self.q_weights[q] / len(self.bzq_qc),
                exx_q[q] / self.q_weights[q] * len(self.bzq_qc) * Ha,
            )
        print >>self.txt, "E_EXX = %s eV" % (self.exx * Ha)
        print >>self.txt
        print >>self.txt, "Calculation completed at:  ", ctime()
        print >>self.txt
        print >>self.txt, "------------------------------------------------------"
        print >>self.txt
Beispiel #10
0
    def calculate_exx(self):
        """Non-selfconsistent calculation."""

        kd = self.kd
        K = self.fullkd.nibzkpts
        assert self.nspins == 1
        Q = K // world.size
        assert Q * world.size == K
        parallel = (world.size > self.nspins)
        
        self.exx = 0.0
        self.exx_skn = np.zeros((self.nspins, K, self.bd.nbands))

        kpt_u = []
        for k in range(world.rank * Q, (world.rank + 1) * Q):
            k_c = self.fullkd.ibzk_kc[k]
            for k1, k1_c in enumerate(kd.bzk_kc):
                if abs(k1_c - k_c).max() < 1e-10:
                    break
                
            # Index of symmetry related point in the irreducible BZ
            ik = kd.kibz_k[k1]
            kpt = self.kpt_u[ik]

            # KPoint from ground-state calculation
            phase_cd = np.exp(2j * pi * self.gd.sdisp_cd * k_c[:, np.newaxis])
            kpt2 = KPoint0(kpt.weight, kpt.s, k, None, phase_cd)
            kpt2.psit_nG = np.empty_like(kpt.psit_nG)
            kpt2.f_n = kpt.f_n / kpt.weight / K * 2
            for n, psit_G in enumerate(kpt2.psit_nG):
                psit_G[:] = kd.transform_wave_function(kpt.psit_nG[n], k1)

            kpt2.P_ani = self.pt.dict(len(kpt.psit_nG))
            self.pt.integrate(kpt2.psit_nG, kpt2.P_ani, k)
            kpt_u.append(kpt2)

        for s in range(self.nspins):
            kpt1_q = [KPoint(self.fullkd, kpt) for kpt in kpt_u if kpt.s == s]
            kpt2_q = kpt1_q[:]

            if len(kpt1_q) == 0:
                # No s-spins on this CPU:
                continue

            # Send rank:
            srank = self.fullkd.get_rank_and_index(s, (kpt1_q[0].k - 1) % K)[0]

            # Receive rank:
            rrank = self.fullkd.get_rank_and_index(s, (kpt1_q[-1].k + 1) % K)[0]

            # Shift k-points K // 2 times:
            for i in range(K // 2 + 1):
                if i < K // 2:
                    if parallel:
                        kpt = kpt2_q[-1].next()
                        kpt.start_receiving(rrank)
                        kpt2_q[0].start_sending(srank)
                    else:
                        kpt = kpt2_q[0]

                for kpt1, kpt2 in zip(kpt1_q, kpt2_q):
                    if 2 * i == K:
                        self.apply(kpt1, kpt2, invert=(kpt1.k > kpt2.k))
                    else:
                        self.apply(kpt1, kpt2)
                        self.apply(kpt1, kpt2, invert=True)

                if i < K // 2:
                    if parallel:
                        kpt.wait()
                        kpt2_q[0].wait()
                    kpt2_q.pop(0)
                    kpt2_q.append(kpt)
            
        self.exx = world.sum(self.exx)
        world.sum(self.exx_skn)
        self.exx += self.calculate_paw_correction()
Beispiel #11
0
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if (dtype == complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) +
                             0.3 * np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) +
                             0.4 * np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0 * np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0 * np.eye(N, N, 0)
        C0 = S0.copy()
        S0_inv = S0.copy()

    # Local result matrices
    W0 = np.empty((N), dtype=float)
    W0_g = np.empty((N), dtype=float)

    # Calculate eigenvalues / other serial results
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0)  # result returned in lower triangle
        tri2full(S0_inv, 'L')
        S0_inv = inv(S0_inv)
        # tri2full(C0) # symmetrize

    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L')  # C0 was previously overwritten
    Glob2dist.redistribute(S0, Sinv, uplo='L')

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')

    scalapack_inverse_cholesky(dist, C, 'L')

    if dtype == complex:  # Only supported for complex for now
        scalapack_inverse(dist, Sinv, 'L')
    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Sinv_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)
    Dist2glob.redistribute(Sinv, Sinv_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test - C0).max()

        tri2full(Sinv_test, 'L')
        inverse_err = abs(Sinv_test - S0_inv).max()
        ## print 'diagonalize ex err', diag_ex_err
        print('diagonalize dc err', diag_dc_err)
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print('general diagonalize dc err', general_diag_dc_err)
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print('inverse chol err', inverse_chol_err)
        if dtype == complex:
            print('inverse err', inverse_err)
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0
        inverse_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err)
    inverse_chol_err = world.sum(inverse_chol_err)
    inverse_err = world.sum(inverse_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
    if dtype == complex:
        assert inverse_err < tol
Beispiel #12
0
def parallel_transport(calc,
                       direction=0,
                       spinors=True,
                       name=None,
                       scale=1.0,
                       bands=None,
                       theta=0.0,
                       phi=0.0):

    if isinstance(calc, str):
        calc = GPAW(calc, txt=None, communicator=serial_comm)

    if bands is None:
        nv = int(calc.get_number_of_electrons())
        bands = range(nv)

    cell_cv = calc.wfs.gd.cell_cv
    icell_cv = (2 * np.pi) * np.linalg.inv(cell_cv).T
    r_g = calc.wfs.gd.get_grid_point_coordinates()
    Ng = np.prod(np.shape(r_g)[1:]) * (spinors + 1)

    dO_aii = []
    for ia in calc.wfs.kpt_u[0].P_ani.keys():
        dO_ii = calc.wfs.setups[ia].dO_ii
        if spinors:
            # Spinor projections require doubling of the (identical) orbitals
            dO_jj = np.zeros((2 * len(dO_ii), 2 * len(dO_ii)), complex)
            dO_jj[::2, ::2] = dO_ii
            dO_jj[1::2, 1::2] = dO_ii
            dO_aii.append(dO_jj)
        else:
            dO_aii.append(dO_ii)

    N_c = calc.wfs.kd.N_c
    assert 1 in np.delete(N_c, direction)
    Nkx = N_c[0]
    Nky = N_c[1]
    Nkz = N_c[2]

    Nk = Nkx * Nky * Nkz
    Nloc = N_c[direction]
    Npar = Nk // Nloc

    # Parallelization stuff
    myKsize = -(-Npar // (world.size))
    myKrange = range(rank * myKsize, min((rank + 1) * myKsize, Npar))
    myKsize = len(myKrange)

    # Get array of k-point indices of the path. q index is loc direction
    kpts_kq = []
    for k in range(Npar):
        if direction == 0:
            kpts_kq.append(list(range(k, Nkx * Nky, Nky)))
        if direction == 1:
            if Nkz == 1:
                kpts_kq.append(list(range(k * Nky, (k + 1) * Nky)))
            else:
                kpts_kq.append(list(range(k, Nkz * Nky, Nkz)))
        if direction == 2:
            kpts_kq.append(list(range(k * Nloc, (k + 1) * Nloc)))

    G_c = np.array([0, 0, 0])
    G_c[direction] = 1
    G_v = np.dot(G_c, icell_cv)

    kpts_kc = calc.get_bz_k_points()
    kpts_kv = np.dot(kpts_kc, icell_cv)
    if Nloc > 1:
        b_c = kpts_kc[kpts_kq[0][1]] - kpts_kc[kpts_kq[0][0]]
        b_v = np.dot(b_c, icell_cv)
    else:
        b_v = G_v

    e_mk, v_knm = get_spinorbit_eigenvalues(calc,
                                            return_wfs=True,
                                            scale=scale,
                                            theta=theta,
                                            phi=phi)

    phi_km = np.zeros((Npar, len(bands)), float)
    S_km = np.zeros((Npar, len(bands)), float)
    # Loop over the direction parallel components
    for k in myKrange:
        U_qmm = [np.eye(len(bands))]
        print(k)
        qpts_q = kpts_kq[k]
        # Loop over kpoints in the phase direction
        for q in range(Nloc - 1):
            iq1 = qpts_q[q]
            iq2 = qpts_q[q + 1]
            # print(kpts_kc[iq1], kpts_kc[iq2])
            if q == 0:
                u1_nsG = get_spinorbit_wavefunctions(calc, iq1,
                                                     v_knm[iq1])[bands]
                # Transform from psi-like to u-like
                u1_nsG[:] *= np.exp(-1.0j *
                                    gemmdot(kpts_kv[iq1], r_g, beta=0.0))
                P1_ani = get_spinorbit_projections(calc, iq1, v_knm[iq1])

            u2_nsG = get_spinorbit_wavefunctions(calc, iq2, v_knm[iq2])[bands]
            u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq2], r_g, beta=0.0))
            P2_ani = get_spinorbit_projections(calc, iq2, v_knm[iq2])

            M_mm = get_overlap(calc, bands,
                               np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                               np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                               P2_ani, dO_aii, b_v)
            V_mm, sing_m, W_mm = np.linalg.svd(M_mm)
            U_mm = np.dot(V_mm, W_mm).conj()
            u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3))
            u_nxsyz = np.swapaxes(u_nysxz, 1, 3)
            u_nsxyz = np.swapaxes(u_nxsyz, 1, 2)
            u2_nsG = u_nsxyz
            for a in range(len(calc.atoms)):
                P2_ni = P2_ani[a][bands]
                P2_ni = np.dot(U_mm, P2_ni)
                P2_ani[a][bands] = P2_ni
            U_qmm.append(U_mm)
            u1_nsG = u2_nsG
            P1_ani = P2_ani
        U_qmm = np.array(U_qmm)

        # Fix phases for last point
        iq0 = qpts_q[0]
        if Nloc == 1:
            u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
            u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
            P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        u2_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
        u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
        u2_nsG[:] *= np.exp(-1.0j * gemmdot(G_v, r_g, beta=0.0))
        P2_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            # P2_ni *= np.exp(-1.0j * np.dot(G_v, r_av[a]))
            P2_ani[a][bands] = P2_ni
        M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                           np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                           P2_ani, dO_aii, b_v)
        V_mm, sing_m, W_mm = np.linalg.svd(M_mm)
        U_mm = np.dot(V_mm, W_mm).conj()
        u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3))
        u_nxsyz = np.swapaxes(u_nysxz, 1, 3)
        u_nsxyz = np.swapaxes(u_nxsyz, 1, 2)
        u2_nsG = u_nsxyz
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            P2_ni = np.dot(U_mm, P2_ni)
            P2_ani[a][bands] = P2_ni

        # Get overlap between first kpts and its smoothly translated image
        u2_nsG[:] *= np.exp(1.0j * gemmdot(G_v, r_g, beta=0.0))
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            # P2_ni *= np.exp(1.0j * np.dot(G_v, r_av[a]))
            P2_ani[a][bands] = P2_ni
        u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
        u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
        P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                           np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                           P2_ani, dO_aii, np.array([0.0, 0.0, 0.0]))
        l_m, l_mm = np.linalg.eig(M_mm)
        phi_km[k] = np.angle(l_m)
        print(phi_km[k] / 2 / np.pi)

        A_mm = np.zeros_like(l_mm, complex)
        for q in range(Nloc):
            iq = qpts_q[q]
            U_mm = U_qmm[q]
            v_nm = U_mm.dot(v_knm[iq][:, bands].T).T
            A_mm += np.dot(v_nm[::2].T.conj(), v_nm[::2])
            A_mm -= np.dot(v_nm[1::2].T.conj(), v_nm[1::2])
        A_mm /= Nloc
        S_km[k] = np.diag(l_mm.T.conj().dot(A_mm).dot(l_mm)).real

    world.sum(phi_km)
    world.sum(S_km)

    np.savez('phases_%s.npz' % name, phi_km=phi_km, S_km=S_km)
Beispiel #13
0
def calculate_Kxc(pd, calc, functional='ALDA', density_cut=None):
    """ALDA kernel"""

    gd = pd.gd
    npw = pd.ngmax
    nG = pd.gd.N_c
    vol = pd.gd.volume
    G_Gv = pd.get_reciprocal_vectors()
    nt_sG = calc.density.nt_sG
    R_av = calc.atoms.positions / Bohr
    setups = calc.wfs.setups
    D_asp = calc.density.D_asp

    # The soft part
    # assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG, iQ in enumerate(pd.Q_qG[0]):
            iQ_c = (np.unravel_index(iQ, nG) + nG // 2) % nG - nG // 2
            for jG, jQ in enumerate(pd.Q_qG[0]):
                jQ_c = (np.unravel_index(jQ, nG) + nG // 2) % nG - nG // 2
                ijQ_c = (iQ_c - jQ_c)
                if (abs(ijQ_c) < nG // 2).all():
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][tuple(ijQ_c)]

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for v in range(3):
        dG_GGv[:, :, v] = np.subtract.outer(G_Gv[:, v], G_Gv[:, v])

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x *
                                                 (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n]) *
                                     rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] - ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    if pd.kd.gamma:
        Kxc_sGG[:, 0, :] = 0.0
        Kxc_sGG[:, :, 0] = 0.0

    return Kxc_sGG / vol
Beispiel #14
0
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups,
                  D_asp):
    """LDA kernel"""

    # The soft part
    assert np.abs(nt_sG[0].shape - nG).sum() == 0

    xc = XC('LDA')

    fxc_sg = np.zeros_like(nt_sG)
    xc.calculate_fxc(gd, nt_sG, fxc_sg)
    fxc_g = fxc_sg[0]

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_g = np.fft.fftn(fxc_g) * vol / nG0

    r_vg = gd.get_grid_point_coordinates()

    Kxc_GG = np.zeros((npw, npw), dtype=complex)
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            if (nG / 2 - np.abs(dG_c) > 0).all():
                index = (dG_c + nG) % nG
                Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]]
            else:  # not in the fft index
                dG_v = np.dot(dG_c, bcell_cv)
                dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j * dGr_g) * fxc_g)

    KxcPAW_GG = np.zeros_like(Kxc_GG)
    # The PAW part
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)
            assert nspins == 1

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)
            # Add core density
            n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g
            nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))

            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                xc.calculate_fxc(rgd, nt_sg, ft_sg)

                coef_GGg = np.exp(
                    -1j *
                    np.outer(np.inner(dG_GGv, R_nv[n]), rgd.r_g)).reshape(
                        npw, npw, rgd.ng)
                KxcPAW_GG += w * np.dot(
                    coef_GGg, (f_sg[0] - ft_sg[0]) * dv_g) * coefatoms_GG
    world.sum(KxcPAW_GG)
    Kxc_GG += KxcPAW_GG

    return Kxc_GG / vol
Beispiel #15
0
from ase.structure import molecule
from gpaw import GPAW
from gpaw.wavefunctions.pw import PW
from gpaw.mpi import world

a = molecule('H', pbc=1)
a.center(vacuum=2)

comm = world.new_communicator([0])
e0 = 0.0
if world.rank == 0:
    a.calc = GPAW(mode=PW(250),
                  communicator=comm,
                  txt=None)
    e0 = a.get_potential_energy()
e0 = world.sum(e0)

a.calc = GPAW(mode=PW(250),
              eigensolver='rmm-diis',
              basis='szp(dzp)',
              txt='%d.txt' % world.size)
e = a.get_potential_energy()
f = a.get_forces()
assert abs(e - e0) < 7e-5, abs(e - e0)
assert abs(f).max() < 1e-10, abs(f).max()

Beispiel #16
0
    if Fref is not None:
        Ferr = np.abs(F - Fref).max()
        assert Ferr < 1e-6, 'Bad F: err=%f; parallel=%s' % (Ferr, parallel)
    return E, F

# First calculate reference energy and forces E and F
#
# If we want to really dumb things down, enable this to force an
# entirely serial calculation:
if 0: 
    serial = world.new_communicator([0])
    E = 0.0
    F = np.zeros((len(system), 3))
    if world.rank == 0:
        E, F = calculate({}, serial)
    E = world.sum(E)
    world.sum(F)
else:
    # Normally we'll just do it in parallel;
    # that case is covered well by other tests, so we can probably trust it
    E, F = calculate({}, world)

def check(parallel):
    return calculate(parallel, comm=world, Eref=E, Fref=F)

assert world.size in [1, 2, 4, 8], ('Number of CPUs %d not supported'
                                    % world.size)

parallel = dict(domain=1, band=1)
sl_cpus = world.size
if world.size % 2 == 0:
Beispiel #17
0
    def calculate(self):
        calc = self.calc
        focc_S = self.focc_S
        e_S = self.e_S
        op_scc = calc.wfs.kd.symmetry.op_scc

        # Get phi_qaGp
        if self.mode == 'RPA':
            self.phi_aGp = self.get_phi_aGp()
        else:
            fd = opencew('phi_qaGp')
            if fd is None:
                self.reader = Reader('phi_qaGp')
                tmp = self.load_phi_aGp(self.reader, 0)[0]
                assert len(tmp) == self.npw
                self.printtxt('Finished reading phi_aGp')
            else:
                self.printtxt('Calculating phi_qaGp')
                self.get_phi_qaGp()
                world.barrier()
                self.reader = Reader('phi_qaGp')
            self.printtxt('Memory used %f M' % (maxrss() / 1024.**2))
            self.printtxt('')

        if self.optical_limit:
            iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0]
        else:
            iq = np.where(
                np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0]
        kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)])
        if self.optical_limit:
            kc_G[0] = 0.

        # Get screened Coulomb kernel
        if self.mode == 'BSE':
            try:
                # Read
                data = pickle.load(open(self.kernel_file + '.pckl'))
                W_qGG = data['W_qGG']
                assert np.shape(W_qGG) == np.shape(self.V_qGG)
                self.printtxt('Finished reading screening interaction kernel')
            except:
                # Calculate from scratch
                self.printtxt('Calculating screening interaction kernel.')
                W_qGG = self.full_static_screened_interaction()
            self.printtxt('')
        else:
            W_qGG = self.V_qGG

        t0 = time()
        self.printtxt('Calculating %s matrix elements' % self.mode)

        # Calculate full kernel
        K_SS = np.zeros((self.nS_local, self.nS), dtype=complex)
        self.rhoG0_S = np.zeros(self.nS, dtype=complex)

        #noGmap = 0
        for iS in range(self.nS_start, self.nS_end):
            k1, n1, m1 = self.Sindex_S3[iS]
            rho1_G = self.density_matrix(n1, m1, k1)
            self.rhoG0_S[iS] = rho1_G[0]
            for jS in range(self.nS):
                k2, n2, m2 = self.Sindex_S3[jS]
                rho2_G = self.density_matrix(n2, m2, k2)
                K_SS[iS - self.nS_start,
                     jS] = np.sum(rho1_G.conj() * rho2_G * kc_G)

                if not self.mode == 'RPA':
                    rho3_G = self.density_matrix(n1, n2, k1, k2)
                    rho4_G = self.density_matrix(m1, m2, self.kq_k[k1],
                                                 self.kq_k[k2])

                    q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1]
                    q_c[np.where(q_c > 0.501)] -= 1.
                    q_c[np.where(q_c < -0.499)] += 1.
                    iq = self.kd.where_is_q(q_c, self.bzq_qc)

                    if not self.qsymm:
                        W_GG = W_qGG[iq]
                    else:
                        ibzq = self.ibzq_q[iq]
                        W_GG_tmp = W_qGG[ibzq]

                        iop = self.iop_q[iq]
                        timerev = self.timerev_q[iq]
                        diff_c = self.diff_qc[iq]
                        invop = np.linalg.inv(op_scc[iop])
                        Gindex = np.zeros(self.npw, dtype=int)
                        for iG in range(self.npw):
                            G_c = self.Gvec_Gc[iG]
                            if timerev:
                                RotG_c = -np.int8(
                                    np.dot(invop, G_c + diff_c).round())
                            else:
                                RotG_c = np.int8(
                                    np.dot(invop, G_c + diff_c).round())
                            tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1)
                            try:
                                Gindex[iG] = np.where(tmp_G < 1e-5)[0][0]
                            except:
                                #noGmap += 1
                                Gindex[iG] = -1

                        W_GG = np.zeros_like(W_GG_tmp)
                        for iG in range(self.npw):
                            for jG in range(self.npw):
                                if Gindex[iG] == -1 or Gindex[jG] == -1:
                                    W_GG[iG, jG] = 0
                                else:
                                    W_GG[iG, jG] = W_GG_tmp[Gindex[iG],
                                                            Gindex[jG]]

                    if self.mode == 'BSE':
                        tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG
                        K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_GG)
                    else:
                        tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG)
                        K_SS[iS - self.nS_start, jS] -= 0.5 * np.sum(tmp_G)
            self.timing(iS, t0, self.nS_local, 'pair orbital')

        K_SS /= self.vol

        world.sum(self.rhoG0_S)
        #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap)

        # Get and solve Hamiltonian
        H_sS = np.zeros_like(K_SS)
        for iS in range(self.nS_start, self.nS_end):
            H_sS[iS - self.nS_start, iS] = e_S[iS]
            for jS in range(self.nS):
                H_sS[iS - self.nS_start,
                     jS] += focc_S[iS] * K_SS[iS - self.nS_start, jS]

        # Force matrix to be Hermitian
        if not self.coupling:
            if world.size > 1:
                H_Ss = self.redistribute_H(H_sS)
            else:
                H_Ss = H_sS
            H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * (
                np.imag(H_sS) - np.imag(H_Ss.T)) / 2.

        # Save H_sS matrix
        self.par_save('H_SS', 'H_SS', H_sS)

        return H_sS
Beispiel #18
0
def main(N=73, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)
    
    if (dtype==complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1*np.tri(N, N, k= -N // nprocs) + 0.3*np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2*np.tri(N, N, k= -N // nprocs) + 0.4*np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0*np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0*np.eye(N, N, 0)
        C0 = S0.copy()

    # Local result matrices
    W0 = np.empty((N),dtype=float)
    W0_g = np.empty((N),dtype=float)

    # Calculate eigenvalues
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0) # result returned in lower triangle
        # tri2full(C0) # symmetrize
        
    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')
    scalapack_inverse_cholesky(dist, C, 'L')

    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test-C0).max()
        ## print 'diagonalize ex err', diag_ex_err
        print 'diagonalize dc err', diag_dc_err
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print 'general diagonalize dc err', general_diag_dc_err
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print 'inverse chol err', inverse_chol_err 
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err) 
    inverse_chol_err = world.sum(inverse_chol_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
from gpaw.mpi import world
from gpaw.utilities.dscftools import mpi_debug


W = world.size
N = 32

assert N%W == 0
M = N//W

# Create my share of data
data = np.arange(world.rank*M, (world.rank+1)*M)

# Let's calculate the global sum
slocal = data.sum()
s = world.sum(slocal)
mpi_debug('data: %s, slocal=%d, s=%d' % (data,slocal,s))
assert s == N*(N-1)//2

# Subtract the global mean
data -= s/N
mpi_debug('data: %s' % data)

# -------------------------------------------------------------------

if world.rank == 0:
    print('-'*16)

# Who has global index 11? The master needs it!
i = 11
rank, ilocal = divmod(i, M)
Beispiel #20
0
    def calculate(self):
        calc = self.calc
        f_skn = self.f_skn
        e_skn = self.e_skn
        kq_k = self.kq_k
        focc_S = self.focc_S
        e_S = self.e_S
        op_scc = calc.wfs.symmetry.op_scc

        # Get phi_qaGp
        if self.mode == 'RPA':
            self.phi_aGp = self.get_phi_aGp()
        else:
            try:
                self.reader = Reader('phi_qaGp')
                tmp = self.load_phi_aGp(self.reader, 0)[0]
                assert len(tmp) == self.npw
                self.printtxt('Finished reading phi_aGp')
            except:
                self.printtxt('Calculating phi_qaGp')
                self.get_phi_qaGp()
                world.barrier()
                self.reader = Reader('phi_qaGp')                
            self.printtxt('Memory used %f M' % (maxrss() / 1024.**2))
            self.printtxt('')

        if self.optical_limit:
            iq = np.where(np.sum(abs(self.ibzq_qc), axis=1) < 1e-5)[0][0]
        else:
            iq = np.where(np.sum(abs(self.ibzq_qc - self.q_c), axis=1) < 1e-5)[0][0]
        kc_G = np.array([self.V_qGG[iq, iG, iG] for iG in range(self.npw)])
        if self.optical_limit:
            kc_G[0] = 0.

        # Get screened Coulomb kernel
        if self.mode == 'BSE':
            try:
                # Read
                data = pickle.load(open(self.kernel_file+'.pckl'))
                W_qGG = data['W_qGG']
                assert np.shape(W_qGG) == np.shape(self.V_qGG)
                self.printtxt('Finished reading screening interaction kernel')
            except:
                # Calculate from scratch
                self.printtxt('Calculating screening interaction kernel.')
                W_qGG = self.full_static_screened_interaction()
            self.printtxt('')
        else:
            W_qGG = self.V_qGG
 
        t0 = time()
        self.printtxt('Calculating %s matrix elements' % self.mode)

        # Calculate full kernel
        K_SS = np.zeros((self.nS_local, self.nS), dtype=complex)
        self.rhoG0_S = np.zeros(self.nS, dtype=complex)

        #noGmap = 0
        for iS in range(self.nS_start, self.nS_end):
            k1, n1, m1 = self.Sindex_S3[iS]
            rho1_G = self.density_matrix(n1,m1,k1)
            self.rhoG0_S[iS] = rho1_G[0]
            for jS in range(self.nS):
                k2, n2, m2 = self.Sindex_S3[jS]
                rho2_G = self.density_matrix(n2,m2,k2)
                K_SS[iS-self.nS_start, jS] = np.sum(rho1_G.conj() * rho2_G * kc_G)

                if not self.mode == 'RPA':
                    rho3_G = self.density_matrix(n1,n2,k1,k2)
                    rho4_G = self.density_matrix(m1,m2,self.kq_k[k1],
                                                 self.kq_k[k2])

                    q_c = self.kd.bzk_kc[k2] - self.kd.bzk_kc[k1]
                    q_c[np.where(q_c > 0.501)] -= 1.
                    q_c[np.where(q_c < -0.499)] += 1.
                    iq = self.kd.where_is_q(q_c, self.bzq_qc)
                    
                    if not self.qsymm:    
                        W_GG = W_qGG[iq]
                    else:
                        ibzq = self.ibzq_q[iq]
                        W_GG_tmp = W_qGG[ibzq]

                        iop = self.iop_q[iq]
                        timerev = self.timerev_q[iq]
                        diff_c = self.diff_qc[iq]
                        invop = np.linalg.inv(op_scc[iop])
                        Gindex = np.zeros(self.npw, dtype=int)
                        for iG in range(self.npw):
                            G_c = self.Gvec_Gc[iG]
                            if timerev:
                                RotG_c = -np.int8(np.dot(invop, G_c+diff_c).round())
                            else:
                                RotG_c = np.int8(np.dot(invop, G_c+diff_c).round())
                            tmp_G = np.abs(self.Gvec_Gc - RotG_c).sum(axis=1)
                            try:
                                Gindex[iG] = np.where(tmp_G < 1e-5)[0][0]
                            except:
                                #noGmap += 1
                                Gindex[iG] = -1
    
                        W_GG = np.zeros_like(W_GG_tmp)
                        for iG in range(self.npw):
                            for jG in range(self.npw):
                                if Gindex[iG] == -1 or Gindex[jG] == -1:
                                    W_GG[iG, jG] = 0
                                else:
                                    W_GG[iG, jG] = W_GG_tmp[Gindex[iG], Gindex[jG]]
                                    
                    if self.mode == 'BSE':
                        tmp_GG = np.outer(rho3_G.conj(), rho4_G) * W_GG
                        K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_GG)
                    else:
                        tmp_G = rho3_G.conj() * rho4_G * np.diag(W_GG)
                        K_SS[iS-self.nS_start, jS] -= 0.5 * np.sum(tmp_G)
            self.timing(iS, t0, self.nS_local, 'pair orbital') 
 
        K_SS /= self.vol

        world.sum(self.rhoG0_S)
        #self.printtxt('Number of G indices outside the Gvec_Gc: %d' % noGmap)

        # Get and solve Hamiltonian
        H_sS = np.zeros_like(K_SS)
        for iS in range(self.nS_start, self.nS_end):
            H_sS[iS-self.nS_start,iS] = e_S[iS]
            for jS in range(self.nS):
                H_sS[iS-self.nS_start,jS] += focc_S[iS] * K_SS[iS-self.nS_start,jS]
  
        # Force matrix to be Hermitian
        if not self.coupling:
            if world.size > 1:
                H_Ss = self.redistribute_H(H_sS)
            else:
                H_Ss = H_sS
            H_sS = (np.real(H_sS) + np.real(H_Ss.T)) / 2. + 1j * (np.imag(H_sS) - np.imag(H_Ss.T)) /2.

        # Save H_sS matrix
        self.par_save('H_SS','H_SS', H_sS)

        return H_sS
Beispiel #21
0
    def calculate(self, optical=True, ac=1.0):

        if self.spinors:
            """Calculate spinors. Here m is index of eigenvalues with SOC
            and n is the basis of eigenstates withour SOC. Below m is used
            for unoccupied states and n is used for occupied states so be
            careful!"""

            print('Diagonalizing spin-orbit Hamiltonian', file=self.fd)
            param = self.calc.parameters
            if not param['symmetry'] == 'off':
                print('Calculating KS wavefunctions without symmetry ' +
                      'for spin-orbit', file=self.fd)
                if not op.isfile('gs_nosym.gpw'):
                    calc_so = GPAW(**param)
                    calc_so.set(symmetry='off',
                                fixdensity=True,
                                txt='gs_nosym.txt')
                    calc_so.atoms = self.calc.atoms
                    calc_so.density = self.calc.density
                    calc_so.get_potential_energy()
                    calc_so.write('gs_nosym.gpw')
                calc_so = GPAW('gs_nosym.gpw', txt=None,
                               communicator=serial_comm)
                e_mk, v_knm = get_spinorbit_eigenvalues(calc_so,
                                                        return_wfs=True,
                                                        scale=self.scale)
                del calc_so
            else:
                e_mk, v_knm = get_spinorbit_eigenvalues(self.calc,
                                                        return_wfs=True,
                                                        scale=self.scale)
            e_mk /= Hartree

        # Parallelization stuff
        nK = self.kd.nbzkpts
        myKrange, myKsize, mySsize = self.parallelisation_sizes()

        # Calculate exchange interaction
        qd0 = KPointDescriptor([self.q_c])
        pd0 = PWDescriptor(self.ecut, self.calc.wfs.gd, complex, qd0)
        ikq_k = self.kd.find_k_plus_q(self.q_c)
        v_G = get_coulomb_kernel(pd0, self.kd.N_c, truncation=self.truncation,
                                 wstc=self.wstc)
        if optical:
            v_G[0] = 0.0

        self.pair = PairDensity(self.calc, self.ecut, world=serial_comm,
                                txt='pair.txt')

        # Calculate direct (screened) interaction and PAW corrections
        if self.mode == 'RPA':
            Q_aGii = self.pair.initialize_paw_corrections(pd0)
        else:
            self.get_screened_potential(ac=ac)
            if (self.qd.ibzk_kc - self.q_c < 1.0e-6).all():
                iq0 = self.qd.bz2ibz_k[self.kd.where_is_q(self.q_c,
                                                          self.qd.bzk_kc)]
                Q_aGii = self.Q_qaGii[iq0]
            else:
                Q_aGii = self.pair.initialize_paw_corrections(pd0)

        # Calculate pair densities, eigenvalues and occupations
        so = self.spinors + 1
        Nv, Nc = so * self.nv, so * self.nc
        Ns = self.spins
        rhoex_KsmnG = np.zeros((nK, Ns, Nv, Nc, len(v_G)), complex)
        # rhoG0_Ksmn = np.zeros((nK, Ns, Nv, Nc), complex)
        df_Ksmn = np.zeros((nK, Ns, Nv, Nc), float) # -(ev - ec)
        deps_ksmn = np.zeros((myKsize, Ns, Nv, Nc), float) # -(fv - fc)
        if np.allclose(self.q_c, 0.0):
            optical_limit = True
        else:
            optical_limit = False
        get_pair = self.pair.get_kpoint_pair
        get_rho = self.pair.get_pair_density
        if self.spinors:
            # Get all pair densities to allow for SOC mixing
            # Use twice as many no-SOC states as BSE bands to allow mixing
            vi_s = [2 * self.val_sn[0, 0] - self.val_sn[0, -1] - 1]
            vf_s = [2 * self.con_sn[0, -1] - self.con_sn[0, 0] + 2]
            if vi_s[0] < 0:
                vi_s[0] = 0
            ci_s, cf_s = vi_s, vf_s
            ni, nf = vi_s[0], vf_s[0]
            mvi = 2 * self.val_sn[0, 0]
            mvf = 2 * (self.val_sn[0, -1] + 1)
            mci = 2 * self.con_sn[0, 0]
            mcf = 2 * (self.con_sn[0, -1] + 1)
        else:
            vi_s, vf_s = self.val_sn[:, 0], self.val_sn[:, -1] + 1
            ci_s, cf_s = self.con_sn[:, 0], self.con_sn[:, -1] + 1
        for ik, iK in enumerate(myKrange):
            for s in range(Ns):
                pair = get_pair(pd0, s, iK,
                                vi_s[s], vf_s[s], ci_s[s], cf_s[s])
                m_m = np.arange(vi_s[s], vf_s[s])
                n_n = np.arange(ci_s[s], cf_s[s])
                if self.gw_skn is not None:
                    iKq = self.calc.wfs.kd.find_k_plus_q(self.q_c, [iK])[0]
                    epsv_m = self.gw_skn[s, iK, :self.nv]
                    epsc_n = self.gw_skn[s, iKq, self.nv:]
                    deps_ksmn[ik] = -(epsv_m[:, np.newaxis] - epsc_n)
                elif self.spinors:
                    iKq = self.calc.wfs.kd.find_k_plus_q(self.q_c, [iK])[0]
                    epsv_m = e_mk[mvi:mvf, iK]
                    epsc_n = e_mk[mci:mcf, iKq]
                    deps_ksmn[ik, s] = -(epsv_m[:, np.newaxis] - epsc_n)
                else:
                    deps_ksmn[ik, s] = -pair.get_transition_energies(m_m, n_n)

                df_mn = pair.get_occupation_differences(self.val_sn[s],
                                                        self.con_sn[s])
                rho_mnG = get_rho(pd0, pair,
                                  m_m, n_n,
                                  optical_limit=optical_limit,
                                  direction=self.direction,
                                  Q_aGii=Q_aGii,
                                  extend_head=False)
                if self.spinors:
                    if optical_limit:
                        deps0_mn = -pair.get_transition_energies(m_m, n_n)
                        rho_mnG[:, :, 0] *= deps0_mn
                    df_Ksmn[iK, s, ::2, ::2] = df_mn
                    df_Ksmn[iK, s, ::2, 1::2] = df_mn
                    df_Ksmn[iK, s, 1::2, ::2] = df_mn
                    df_Ksmn[iK, s, 1::2, 1::2] = df_mn
                    vecv0_nm = v_knm[iK][::2][ni:nf, mvi:mvf]
                    vecc0_nm = v_knm[iKq][::2][ni:nf, mci:mcf]
                    rho_0mnG = np.dot(vecv0_nm.T.conj(),
                                      np.dot(vecc0_nm.T, rho_mnG))
                    vecv1_nm = v_knm[iK][1::2][ni:nf, mvi:mvf]
                    vecc1_nm = v_knm[iKq][1::2][ni:nf, mci:mcf]
                    rho_1mnG = np.dot(vecv1_nm.T.conj(),
                                      np.dot(vecc1_nm.T, rho_mnG))
                    rhoex_KsmnG[iK, s] = rho_0mnG + rho_1mnG
                    if optical_limit:
                        rhoex_KsmnG[iK, s, :, :, 0] /= deps_ksmn[ik, s]
                else:
                    df_Ksmn[iK, s] = pair.get_occupation_differences(m_m, n_n)
                    rhoex_KsmnG[iK, s] = rho_mnG

        if self.eshift is not None:
            deps_ksmn[np.where(df_Ksmn[myKrange] > 1.0e-3)] += self.eshift
            deps_ksmn[np.where(df_Ksmn[myKrange] < -1.0e-3)] -= self.eshift

        world.sum(df_Ksmn)
        world.sum(rhoex_KsmnG)

        self.rhoG0_S = np.reshape(rhoex_KsmnG[:, :, :, :, 0], -1)

        if hasattr(self, 'H_sS'):
            return

        # Calculate Hamiltonian
        t0 = time()
        print('Calculating %s matrix elements at q_c = %s'
              % (self.mode, self.q_c), file=self.fd)
        H_ksmnKsmn = np.zeros((myKsize, Ns, Nv, Nc, nK, Ns, Nv, Nc), complex)
        for ik1, iK1 in enumerate(myKrange):
            for s1 in range(Ns):
                kptv1 = self.pair.get_k_point(s1, iK1, vi_s[s1], vf_s[s1])
                kptc1 = self.pair.get_k_point(s1, ikq_k[iK1], ci_s[s1],
                                              cf_s[s1])
                rho1_mnG = rhoex_KsmnG[iK1, s1]

                #rhoG0_Ksmn[iK1, s1] = rho1_mnG[:, :, 0]
                rho1ccV_mnG = rho1_mnG.conj()[:, :] * v_G
                for s2 in range(Ns):
                    for Q_c in self.qd.bzk_kc:
                        iK2 = self.kd.find_k_plus_q(Q_c, [kptv1.K])[0]
                        rho2_mnG = rhoex_KsmnG[iK2, s2]
                        rho2_mGn = np.swapaxes(rho2_mnG, 1, 2)
                        H_ksmnKsmn[ik1, s1, :, :, iK2, s2, :, :] += (
                            np.dot(rho1ccV_mnG, rho2_mGn))
                        if not self.mode == 'RPA' and s1 == s2:
                            ikq = ikq_k[iK2]
                            kptv2 = self.pair.get_k_point(s1, iK2, vi_s[s1],
                                                          vf_s[s1])
                            kptc2 = self.pair.get_k_point(s1, ikq, ci_s[s1],
                                                          cf_s[s1])
                            rho3_mmG, iq = self.get_density_matrix(kptv1,
                                                                   kptv2)
                            rho4_nnG, iq = self.get_density_matrix(kptc1,
                                                                   kptc2)
                            if self.spinors:
                                vec0_nm = v_knm[iK1][::2][ni:nf, mvi:mvf]
                                vec1_nm = v_knm[iK1][1::2][ni:nf, mvi:mvf]
                                vec2_nm = v_knm[iK2][::2][ni:nf, mvi:mvf]
                                vec3_nm = v_knm[iK2][1::2][ni:nf, mvi:mvf]
                                rho_0mnG = np.dot(vec0_nm.T.conj(),
                                                  np.dot(vec2_nm.T, rho3_mmG))
                                rho_1mnG = np.dot(vec1_nm.T.conj(),
                                                  np.dot(vec3_nm.T, rho3_mmG))
                                rho3_mmG = rho_0mnG + rho_1mnG
                                vec0_nm = v_knm[ikq_k[iK1]][::2][ni:nf, mci:mcf]
                                vec1_nm = v_knm[ikq_k[iK1]][1::2][ni:nf,mci:mcf]
                                vec2_nm = v_knm[ikq][::2][ni:nf, mci:mcf]
                                vec3_nm = v_knm[ikq][1::2][ni:nf, mci:mcf]
                                rho_0mnG = np.dot(vec0_nm.T.conj(),
                                                  np.dot(vec2_nm.T, rho4_nnG))
                                rho_1mnG = np.dot(vec1_nm.T.conj(),
                                                  np.dot(vec3_nm.T, rho4_nnG))
                                rho4_nnG = rho_0mnG + rho_1mnG

                            rho3ccW_mmG = np.dot(rho3_mmG.conj(),
                                                 self.W_qGG[iq])
                            W_mmnn = np.dot(rho3ccW_mmG,
                                            np.swapaxes(rho4_nnG, 1, 2))
                            W_mnmn = np.swapaxes(W_mmnn, 1, 2) * Ns * so
                            H_ksmnKsmn[ik1, s1, :, :, iK2, s1] -= 0.5 * W_mnmn
            if iK1 % (myKsize // 5 + 1) == 0:
                dt = time() - t0
                tleft = dt * myKsize / (iK1 + 1) - dt
                print('  Finished %s pair orbitals in %s - Estimated %s left' %
                      ((iK1 + 1) * Nv * Nc * Ns * world.size,
                       timedelta(seconds=round(dt)),
                       timedelta(seconds=round(tleft))), file=self.fd)

        #if self.mode == 'BSE':
        #    del self.Q_qaGii, self.W_qGG, self.pd_q

        H_ksmnKsmn /= self.vol

        mySsize = myKsize * Nv * Nc * Ns
        if myKsize > 0:
            iS0 = myKrange[0] *  Nv * Nc * Ns

        #world.sum(rhoG0_Ksmn)
        #self.rhoG0_S = np.reshape(rhoG0_Ksmn, -1)
        self.df_S = np.reshape(df_Ksmn, -1)
        if not self.td:
            self.excludef_S = np.where(np.abs(self.df_S) < 0.001)[0]
        # multiply by 2 when spin-paired and no SOC
        self.df_S *= 2.0 / nK / Ns / so
        self.deps_s = np.reshape(deps_ksmn, -1)
        H_sS = np.reshape(H_ksmnKsmn, (mySsize, self.nS))
        for iS in range(mySsize):
            # Multiply by occupations and adiabatic coupling
            H_sS[iS] *= self.df_S[iS0 + iS] * ac
            # add bare transition energies
            H_sS[iS, iS0 + iS] += self.deps_s[iS]

        self.H_sS = H_sS

        if self.write_h:
            self.par_save('H_SS.ulm', 'H_SS', self.H_sS)
Beispiel #22
0
from ase.structure import molecule
from gpaw import GPAW
from gpaw.wavefunctions.pw import PW
from gpaw.mpi import world

a = molecule('H', pbc=1)
a.center(vacuum=2)

comm = world.new_communicator([world.rank])
e0 = 0.0
a.calc = GPAW(mode=PW(250),
              communicator=comm,
              txt=None)
e0 = a.get_potential_energy()
e0 = world.sum(e0) / world.size

a.calc = GPAW(mode=PW(250),
              eigensolver='rmm-diis',
              basis='szp(dzp)',
              txt='%d.txt' % world.size)
e = a.get_potential_energy()
f = a.get_forces()
assert abs(e - e0) < 7e-5, abs(e - e0)
assert abs(f).max() < 1e-10, abs(f).max()

Beispiel #23
0
def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol,
                  bcell_cv, R_av, setups, D_asp):
    """LDA kernel"""

    # The soft part
    assert np.abs(nt_sG[0].shape - nG).sum() == 0

    xc = XC('LDA')
    
    fxc_sg = np.zeros_like(nt_sG)
    xc.calculate_fxc(gd, nt_sG, fxc_sg)
    fxc_g = fxc_sg[0]

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_g = np.fft.fftn(fxc_g) * vol / nG0

    r_vg = gd.get_grid_point_coordinates()
    
    Kxc_GG = np.zeros((npw, npw), dtype=complex)
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            if (nG / 2 - np.abs(dG_c) > 0).all():
                index = (dG_c + nG) % nG
                Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]]
            else: # not in the fft index
                dG_v = np.dot(dG_c, bcell_cv)
                dGr_g = gemmdot(dG_v, r_vg, beta=0.0) 
                Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j*dGr_g)*fxc_g)

    KxcPAW_GG = np.zeros_like(Kxc_GG)
    # The PAW part
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] =  np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g
        
            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)
            assert nspins == 1
            
            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)
        
            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)
            # Add core density
            n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g
            nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g
            
            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
        
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                xc.calculate_fxc(rgd, n_sg, f_sg)
                
                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                xc.calculate_fxc(rgd, nt_sg, ft_sg)
        
                coef_GGg = np.exp(-1j * np.outer(np.inner(dG_GGv, R_nv[n]),
                                                 rgd.r_g)).reshape(npw,npw,rgd.ng)
                KxcPAW_GG += w * np.dot(coef_GGg, (f_sg[0]-ft_sg[0]) * dv_g) * coefatoms_GG
    world.sum(KxcPAW_GG)
    Kxc_GG += KxcPAW_GG
    
    return Kxc_GG / vol
def calculate_Kxc(pd, nt_sG, R_av, setups, D_asp, functional='ALDA',
                  density_cut=None):
    """ALDA kernel"""

    gd = pd.gd
    npw = pd.ngmax
    nG = pd.gd.N_c
    vol = pd.gd.volume
    bcell_cv = np.linalg.inv(pd.gd.cell_cv)
    G_Gv = pd.get_reciprocal_vectors()
    
    # The soft part
    #assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    r_vg = gd.get_grid_point_coordinates()
    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG, iQ in enumerate(pd.Q_qG[0]):
            iQ_c = (np.unravel_index(iQ, nG) + nG // 2) % nG - nG // 2
            for jG, jQ in enumerate(pd.Q_qG[0]):
                jQ_c = (np.unravel_index(jQ, nG) + nG // 2) % nG - nG // 2
                ijQ_c = (iQ_c - jQ_c)
                if (abs(ijQ_c) < nG // 2).all():
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][tuple(ijQ_c)]

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for v in range(3):
        dG_GGv[:, :, v] = np.subtract.outer(G_Gv[:, v], G_Gv[:, v])

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x
                                                 * (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n])
                                     * rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] -
                                                     ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    return Kxc_sGG / vol
Beispiel #25
0
    def calculate_exx(self):
        """Non-selfconsistent calculation."""

        kd = self.kd
        K = self.fullkd.nibzkpts
        assert self.nspins == 1
        Q = K // world.size
        assert Q * world.size == K
        parallel = (world.size > self.nspins)

        self.exx = 0.0
        self.exx_skn = np.zeros((self.nspins, K, self.bd.nbands))

        kpt_u = []
        for k in range(world.rank * Q, (world.rank + 1) * Q):
            k_c = self.fullkd.ibzk_kc[k]
            for k1, k1_c in enumerate(kd.bzk_kc):
                if abs(k1_c - k_c).max() < 1e-10:
                    break

            # Index of symmetry related point in the irreducible BZ
            ik = kd.kibz_k[k1]
            kpt = self.kpt_u[ik]

            # KPoint from ground-state calculation
            phase_cd = np.exp(2j * pi * self.gd.sdisp_cd * k_c[:, np.newaxis])
            kpt2 = KPoint0(kpt.weight, kpt.s, k, None, phase_cd)
            kpt2.psit_nG = np.empty_like(kpt.psit_nG)
            kpt2.f_n = kpt.f_n / kpt.weight / K * 2
            for n, psit_G in enumerate(kpt2.psit_nG):
                psit_G[:] = kd.transform_wave_function(kpt.psit_nG[n], k1)

            kpt2.P_ani = self.pt.dict(len(kpt.psit_nG))
            self.pt.integrate(kpt2.psit_nG, kpt2.P_ani, k)
            kpt_u.append(kpt2)

        for s in range(self.nspins):
            kpt1_q = [KPoint(self.fullkd, kpt) for kpt in kpt_u if kpt.s == s]
            kpt2_q = kpt1_q[:]

            if len(kpt1_q) == 0:
                # No s-spins on this CPU:
                continue

            # Send rank:
            srank = self.fullkd.get_rank_and_index(s, (kpt1_q[0].k - 1) % K)[0]

            # Receive rank:
            rrank = self.fullkd.get_rank_and_index(s,
                                                   (kpt1_q[-1].k + 1) % K)[0]

            # Shift k-points K // 2 times:
            for i in range(K // 2 + 1):
                if i < K // 2:
                    if parallel:
                        kpt = kpt2_q[-1].next()
                        kpt.start_receiving(rrank)
                        kpt2_q[0].start_sending(srank)
                    else:
                        kpt = kpt2_q[0]

                for kpt1, kpt2 in zip(kpt1_q, kpt2_q):
                    if 2 * i == K:
                        self.apply(kpt1, kpt2, invert=(kpt1.k > kpt2.k))
                    else:
                        self.apply(kpt1, kpt2)
                        self.apply(kpt1, kpt2, invert=True)

                if i < K // 2:
                    if parallel:
                        kpt.wait()
                        kpt2_q[0].wait()
                    kpt2_q.pop(0)
                    kpt2_q.append(kpt)

        self.exx = world.sum(self.exx)
        world.sum(self.exx_skn)
        self.exx += self.calculate_paw_correction()
Beispiel #26
0
from ase.build import molecule
from gpaw import GPAW
from gpaw.wavefunctions.pw import PW
from gpaw.mpi import world

a = molecule('H', pbc=1)
a.center(vacuum=2)

comm = world.new_communicator([world.rank])
e0 = 0.0
a.calc = GPAW(mode=PW(250), communicator=comm, txt=None)
e0 = a.get_potential_energy()
e0 = world.sum(e0) / world.size

a.calc = GPAW(mode=PW(250),
              eigensolver='rmm-diis',
              basis='szp(dzp)',
              txt='%d.txt' % world.size)
e = a.get_potential_energy()
f = a.get_forces()
assert abs(e - e0) < 7e-5, abs(e - e0)
assert abs(f).max() < 1e-10, abs(f).max()
Beispiel #27
0
from gpaw.atom.configurations import parameters
from gpaw import setup_paths
from gpaw.test import equal
from gpaw.mpi import world

atom = 'Ne'
setup_paths.insert(0, '.')

for xcname in ['GLLBSC', 'GLLB']:
    if world.rank == 0:
        g = Generator(atom, xcname=xcname, scalarrel=False, nofiles=True)
        g.run(**parameters[atom])
        eps = g.e_j[-1]
    else:
        eps = 0.0
    eps = world.sum(eps)
    world.barrier()

    a = 5
    Ne = Atoms([Atom(atom, (0, 0, 0))], cell=(a, a, a), pbc=False)
    Ne.center()
    calc = GPAW(nbands=7, h=0.25, xc=xcname)
    Ne.set_calculator(calc)
    e = Ne.get_potential_energy()
    # Calculate the discontinuity
    response = calc.hamiltonian.xc.xcs['RESPONSE']
    response.calculate_delta_xc()
    response.calculate_delta_xc_perturbation()

    eps3d = calc.wfs.kpt_u[0].eps_n[3]
    #if world.rank == 0:
Beispiel #28
0
def calculate_Kxc(gd,
                  nt_sG,
                  npw,
                  Gvec_Gc,
                  nG,
                  vol,
                  bcell_cv,
                  R_av,
                  setups,
                  D_asp,
                  functional='ALDA',
                  density_cut=None):
    """ALDA kernel"""

    # The soft part
    #assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    r_vg = gd.get_grid_point_coordinates()
    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG in range(npw):
            for jG in range(npw):
                dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                if (nG / 2 - np.abs(dG_c) > 0).all():
                    index = (dG_c + nG) % nG
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0],
                                                   index[1],
                                                   index[2]]
                else:  # not in the fft index
                    dG_v = np.dot(dG_c, bcell_cv)
                    dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                    Kxc_sGG[s, iG, jG] = gd.integrate(np.exp(-1j * dGr_g)
                                                      * fxc_sg[s])

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x
                                                 * (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n])
                                     * rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] -
                                                     ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    return Kxc_sGG / vol
Beispiel #29
0
def calculate_Kxc(gd,
                  nt_sG,
                  npw,
                  Gvec_Gc,
                  nG,
                  vol,
                  bcell_cv,
                  R_av,
                  setups,
                  D_asp,
                  functional='ALDA',
                  density_cut=None):
    """ALDA kernel"""

    # The soft part
    #assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    r_vg = gd.get_grid_point_coordinates()
    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG in range(npw):
            for jG in range(npw):
                dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                if (nG / 2 - np.abs(dG_c) > 0).all():
                    index = (dG_c + nG) % nG
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0], index[1],
                                                   index[2]]
                else:  # not in the fft index
                    dG_v = np.dot(dG_c, bcell_cv)
                    dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                    Kxc_sGG[s, iG, jG] = gd.integrate(
                        np.exp(-1j * dGr_g) * fxc_sg[s])

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x *
                                                 (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n]) *
                                     rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] - ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    return Kxc_sGG / vol
Beispiel #30
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean
        HEA0 = np.ascontiguousarray(HEA0)

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L')

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print('gemm err', gemm_err)
        print('gemv err', gemv_err)
        print('r2k err', r2k_err)
        print('rk_err', rk_err)
        print('hemm_err', hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)
Beispiel #31
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)
    
    if (dtype==complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)
    
    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        #gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print A0.shape, Z0.shape
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype) # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype) # zeros needed for rank-updates

    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err  = abs(S1 - S0).max()
        rk_err   = abs(U1 - U0).max()
        print 'gemm err', gemm_err
        print 'gemv err', gemv_err
        print 'r2k err' , r2k_err
        print 'rk_err'  , rk_err
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err  = 0.0
        rk_err   = 0.0

    gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err  = world.sum(r2k_err)
    rk_err   = world.sum(rk_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err,0, tol)
Beispiel #32
0
        Ferr = np.abs(F - Fref).max()
        assert Ferr < 1e-6, 'Bad F: err=%f; parallel=%s' % (Ferr, parallel)
    return E, F


# First calculate reference energy and forces E and F
#
# If we want to really dumb things down, enable this to force an
# entirely serial calculation:
if 0:
    serial = world.new_communicator([0])
    E = 0.0
    F = np.zeros((len(system), 3))
    if world.rank == 0:
        E, F = calculate({}, serial)
    E = world.sum(E)
    world.sum(F)
else:
    # Normally we'll just do it in parallel;
    # that case is covered well by other tests, so we can probably trust it
    E, F = calculate({}, world)


def check(parallel):
    return calculate(parallel, comm=world, Eref=E, Fref=F)


assert world.size in [1, 2, 4,
                      8], ('Number of CPUs %d not supported' % world.size)

parallel = dict(domain=1, band=1)
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa="T")
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo="L", side="L")

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print("gemm err", gemm_err)
        print("gemv err", gemv_err)
        print("r2k err", r2k_err)
        print("rk_err", rk_err)
        print("hemm_err", hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)