Example #1
0
    def __init__(self, sl_lrtddft, nrows, lr_comms):
        self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft)

        self.lr_comms = lr_comms

        # original grid, ie, how matrix is stored
        self.matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                     self.lr_comms.dd_comm.size,
                                     self.lr_comms.eh_comm.size)

        # diagonalization grid
        self.diag_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs,
                                   self.nprocs)

        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        #
        # M = rows, N = cols
        M = nrows
        N = nrows
        mb = 1
        nb = 1
        self.matrix_descr = self.matrix_grid.new_descriptor(N, M, nb, mb)

        bs = self.block_size
        self.diag_descr = self.diag_grid.new_descriptor(N, M, bs, bs)

        self.diag_in_redist = Redistributor(self.lr_comms.parent_comm,
                                            self.matrix_descr, self.diag_descr)

        self.diag_out_redist = Redistributor(self.lr_comms.parent_comm,
                                             self.diag_descr,
                                             self.matrix_descr)
Example #2
0
    def scalapack_diagonalize(self, H_sS):

        mb = 32
        N = self.nS

        g1 = BlacsGrid(world, size, 1)
        g2 = BlacsGrid(world, size // 2, 2)
        nndesc1 = g1.new_descriptor(N, N, self.nS_local, N)
        nndesc2 = g2.new_descriptor(N, N, mb, mb)

        A_ss = nndesc2.empty(dtype=H_sS.dtype)
        redistributor = Redistributor(world, nndesc1, nndesc2)
        redistributor.redistribute(H_sS, A_ss)

        # diagonalize
        v_ss = nndesc2.zeros(dtype=A_ss.dtype)
        w_S = np.zeros(N, dtype=float)
        nndesc2.diagonalize_dc(A_ss, v_ss, w_S, 'L')

        # distribute the eigenvectors to master
        v_sS = np.zeros_like(H_sS)
        redistributor = Redistributor(world, nndesc2, nndesc1)
        redistributor.redistribute(v_ss, v_sS)

        #        v2_SS = np.zeros((self.nS, self.nS), dtype=complex)
        #        world.all_gather(v_sS, v2_SS)

        return w_S, v_sS.conj()
Example #3
0
    def __init__(self,
                 gd,
                 bd,
                 block_comm,
                 dtype,
                 mcpus,
                 ncpus,
                 blocksize,
                 nao,
                 timer=nulltimer):
        BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus,
                              blocksize, timer)
        nbands = bd.nbands
        self.blocksize = blocksize
        self.mynbands = mynbands = bd.mynbands

        self.orbital_comm = self.bd.comm
        self.naoblocksize = naoblocksize = -((-nao) // self.orbital_comm.size)
        self.nao = nao

        # Range of basis functions for BLACS distribution of matrices:
        self.Mmax = nao
        self.Mstart = bd.comm.rank * naoblocksize
        self.Mstop = min(self.Mstart + naoblocksize, self.Mmax)
        self.mynao = self.Mstop - self.Mstart

        # Column layout for one matrix per band rank:
        self.columngrid = BlacsGrid(bd.comm, bd.comm.size, 1)
        self.mMdescriptor = self.columngrid.new_descriptor(
            nao, nao, naoblocksize, nao)
        self.nMdescriptor = self.columngrid.new_descriptor(
            nbands, nao, mynbands, nao)

        #parallelprint(world, (mynao, self.mMdescriptor.shape))

        # Column layout for one matrix in total (only on grid masters):
        self.single_column_grid = BlacsGrid(self.column_comm, bd.comm.size, 1)
        self.mM_unique_descriptor = self.single_column_grid.new_descriptor( \
            nao, nao, naoblocksize, nao)

        # nM_unique_descriptor is meant to hold the coefficients after
        # diagonalization.  BLACS requires it to be nao-by-nao, but
        # we only fill meaningful data into the first nbands columns.
        #
        # The array will then be trimmed and broadcast across
        # the grid descriptor's communicator.
        self.nM_unique_descriptor = self.single_column_grid.new_descriptor( \
            nbands, nao, mynbands, nao)

        # Fully blocked grid for diagonalization with many CPUs:
        self.mmdescriptor = self.blockgrid.new_descriptor(
            nao, nao, blocksize, blocksize)

        #self.nMdescriptor = nMdescriptor
        self.mM2mm = Redistributor(self.block_comm, self.mM_unique_descriptor,
                                   self.mmdescriptor)
        self.mm2nM = Redistributor(self.block_comm, self.mmdescriptor,
                                   self.nM_unique_descriptor)
Example #4
0
    def redistribute_H(self, H_sS):

        g1 = BlacsGrid(world, size, 1)
        g2 = BlacsGrid(world, 1, size)
        N = self.nS
        nndesc1 = g1.new_descriptor(N, N, self.nS_local, N)
        nndesc2 = g2.new_descriptor(N, N, N, self.nS_local)

        H_Ss = nndesc2.empty(dtype=H_sS.dtype)
        redistributor = Redistributor(world, nndesc1, nndesc2)
        redistributor.redistribute(H_sS, H_Ss)

        return H_Ss
Example #5
0
    def diagonalize(self):

        print('Diagonalizing Hamiltonian', file=self.fd)
        """The t and T represent local and global
           eigenstates indices respectively
        """

        # Non-Hermitian matrix can only use linalg.eig
        if not self.td:
            print('  Using numpy.linalg.eig...', file=self.fd)
            print('  Eliminated %s pair orbitals' % len(self.excludef_S),
                  file=self.fd)

            self.H_SS = self.collect_A_SS(self.H_sS)
            self.w_T = np.zeros(self.nS - len(self.excludef_S), complex)
            if world.rank == 0:
                self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=0)
                self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=1)
                self.w_T, self.v_ST = np.linalg.eig(self.H_SS)
            world.broadcast(self.w_T, 0)
            self.df_S = np.delete(self.df_S, self.excludef_S)
            self.rhoG0_S = np.delete(self.rhoG0_S, self.excludef_S)
        # Here the eigenvectors are returned as complex conjugated rows
        else:
            if world.size == 1:
                print('  Using lapack...', file=self.fd)
                from gpaw.utilities.lapack import diagonalize
                self.w_T = np.zeros(self.nS)
                diagonalize(self.H_sS, self.w_T)
                self.v_St = self.H_sS.conj().T
            else:
                print('  Using scalapack...', file=self.fd)
                nS = self.nS
                ns = -(-self.kd.nbzkpts // world.size) * (self.nv * self.nc *
                                                          self.spins *
                                                          (self.spinors + 1)**2)
                grid = BlacsGrid(world, world.size, 1)
                desc = grid.new_descriptor(nS, nS, ns, nS)

                desc2 = grid.new_descriptor(nS, nS, 2, 2)
                H_tmp = desc2.zeros(dtype=complex)
                r = Redistributor(world, desc, desc2)
                r.redistribute(self.H_sS, H_tmp)

                self.w_T = np.empty(nS)
                v_tmp = desc2.empty(dtype=complex)
                desc2.diagonalize_dc(H_tmp, v_tmp, self.w_T)

                r = Redistributor(grid.comm, desc2, desc)
                self.v_St = desc.zeros(dtype=complex)
                r.redistribute(v_tmp, self.v_St)
                self.v_St = self.v_St.conj().T

        if self.write_v and self.td:
            # Cannot use par_save without td
            self.par_save('v_TS.ulm', 'v_TS', self.v_St.T)

        return
Example #6
0
    def redistribute(self, in_wGG, out_x=None):
        """Redistribute array.
        
        Switch between two kinds of parallel distributions:
            
        1) parallel over G-vectors (second dimension of in_wGG)
        2) parallel over frequency (first dimension of in_wGG)

        Returns new array using the memory in the 1-d array out_x.
        """

        comm = self.blockcomm

        if comm.size == 1:
            return in_wGG

        nw = len(self.omega_w)
        nG = in_wGG.shape[2]
        mynw = (nw + comm.size - 1) // comm.size
        mynG = (nG + comm.size - 1) // comm.size

        bg1 = BlacsGrid(comm, comm.size, 1)
        bg2 = BlacsGrid(comm, 1, comm.size)
        md1 = BlacsDescriptor(bg1, nw, nG**2, mynw, nG**2)
        md2 = BlacsDescriptor(bg2, nw, nG**2, nw, mynG * nG)

        if len(in_wGG) == nw:
            mdin = md2
            mdout = md1
        else:
            mdin = md1
            mdout = md2

        r = Redistributor(comm, mdin, mdout)

        outshape = (mdout.shape[0], mdout.shape[1] // nG, nG)
        if out_x is None:
            out_wGG = np.empty(outshape, complex)
        else:
            out_wGG = out_x[:np.product(outshape)].reshape(outshape)

        r.redistribute(in_wGG.reshape(mdin.shape),
                       out_wGG.reshape(mdout.shape))

        return out_wGG
Example #7
0
def test(comm, M, N, mcpus, ncpus, mb, nb):
    grid0 = BlacsGrid(comm, 1, 1)
    desc0 = grid0.new_descriptor(M, N, M, N, 0, 0)
    A_mn = desc0.zeros(dtype=float)
    A_mn[:] = comm.size + 1

    grid1 = BlacsGrid(comm, mcpus, ncpus)
    desc1 = grid1.new_descriptor(M, N, mb, nb, 0, 0)  # ???
    B_mn = desc1.zeros(dtype=float)
    B_mn[:] = comm.rank

    if comm.rank == 0:
        msg = 'Slices of global matrix indices by rank'
        print(msg)
        print('-' * len(msg))

    for rank in range(comm.size):
        comm.barrier()
        if rank == comm.rank:
            print('Rank %d:' % rank)
            last_Mstart = -1
            for Mstart, Mstop, Nstart, Nstop, block in desc1.my_blocks(B_mn):
                if Mstart > last_Mstart and last_Mstart >= 0:
                    print()
                print('[%3d:%3d, %3d:%3d]' % (Mstart, Mstop, Nstart, Nstop),
                      end=' ')
                last_Mstart = Mstart
                assert (block == comm.rank).all()
                #print block
                #print
            print()
            print()
        comm.barrier()

    redistributor = Redistributor(comm, desc1, desc0)
    redistributor.redistribute(B_mn, A_mn)

    if comm.rank == 0:
        msg = 'Rank where each element of the global matrix is stored'
        print(msg)
        print('-' * len(msg))
        print(A_mn)
Example #8
0
    def __init__(self,
                 gd,
                 bd,
                 block_comm,
                 dtype,
                 mcpus,
                 ncpus,
                 blocksize,
                 buffer_size=None,
                 timer=nulltimer):
        BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus,
                              blocksize, timer)
        self.buffer_size = buffer_size
        nbands = bd.nbands
        self.mynbands = mynbands = bd.mynbands
        self.blocksize = blocksize

        # 1D layout - columns
        self.columngrid = BlacsGrid(self.column_comm, 1, bd.comm.size)
        self.Nndescriptor = self.columngrid.new_descriptor(
            nbands, nbands, nbands, mynbands)

        # 2D layout
        self.nndescriptor = self.blockgrid.new_descriptor(
            nbands, nbands, blocksize, blocksize)

        # 1D layout - rows
        self.rowgrid = BlacsGrid(self.column_comm, bd.comm.size, 1)
        self.nNdescriptor = self.rowgrid.new_descriptor(
            nbands, nbands, mynbands, nbands)

        # Only redistribute filled out half for Hermitian matrices
        self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor,
                                   self.nndescriptor)
        #self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor,
        #                           self.nndescriptor, 'L') #XXX faster but...

        # Resulting matrix will be used in dgemm which is symmetry obvlious
        self.nn2nN = Redistributor(self.block_comm, self.nndescriptor,
                                   self.nNdescriptor)
Example #9
0
    def distribute_frequencies(self, chi0_wGG):
        """Distribute frequencies to all cores."""

        world = self.world
        comm = self.blockcomm

        if world.size == 1:
            return chi0_wGG

        nw = len(self.omega_w)
        nG = chi0_wGG.shape[2]
        mynw = (nw + world.size - 1) // world.size
        mynG = (nG + comm.size - 1) // comm.size

        wa = min(world.rank * mynw, nw)
        wb = min(wa + mynw, nw)

        if self.blockcomm.size == 1:
            return chi0_wGG[wa:wb].copy()

        if self.kncomm.rank == 0:
            bg1 = BlacsGrid(comm, 1, comm.size)
            in_wGG = chi0_wGG.reshape((nw, -1))
        else:
            bg1 = DryRunBlacsGrid(mpi.serial_comm, 1, 1)
            in_wGG = np.zeros((0, 0), complex)
        md1 = BlacsDescriptor(bg1, nw, nG**2, nw, mynG * nG)

        bg2 = BlacsGrid(world, world.size, 1)
        md2 = BlacsDescriptor(bg2, nw, nG**2, mynw, nG**2)

        r = Redistributor(world, md1, md2)
        shape = (wb - wa, nG, nG)
        out_wGG = np.empty(shape, complex)
        r.redistribute(in_wGG, out_wGG.reshape((wb - wa, nG**2)))

        return out_wGG
Example #10
0
 def __init__(self,
              gd,
              bd,
              block_comm,
              dtype,
              mcpus,
              ncpus,
              blocksize,
              timer=nulltimer):
     KohnShamLayouts.__init__(self, gd, bd, block_comm, dtype, timer)
     # WARNING: Do not create the BlacsGrid on a communicator which does not
     # contain block_comm.rank = 0. This will break BlacsBandLayouts which
     # assume eps_M will be broadcast over block_comm.
     self.blocksize = blocksize
     self.blockgrid = BlacsGrid(self.block_comm, mcpus, ncpus)
Example #11
0
def parallel_eigh(matrixfile, blacsgrid=(4, 2), blocksize=64):
    """Diagonalize matrix in parallel"""
    assert np.prod(blacsgrid) == world.size
    grid = BlacsGrid(world, *blacsgrid)

    if world.rank == MASTER:
        H_MM = np.load(matrixfile)
        assert H_MM.ndim == 2
        assert H_MM.shape[0] == H_MM.shape[1]
        NM = len(H_MM)
    else:
        NM = 0
    NM = world.sum(NM) # Distribute matrix shape to all nodes

    # descriptor for the individual blocks
    block_desc = grid.new_descriptor(NM, NM, blocksize, blocksize)

    # descriptor for global array on MASTER
    local_desc = grid.new_descriptor(NM, NM, NM, NM)

    # Make some dummy array on all the slaves
    if world.rank != MASTER:
        H_MM = local_desc.zeros()
    assert local_desc.check(H_MM)

    # The local version of the matrix
    H_mm = block_desc.empty()

    # Distribute global array to smaller blocks
    redistributor = Redistributor(world, local_desc, block_desc)
    redistributor.redistribute(H_MM, H_mm)

    # Allocate arrays for eigenvalues and -vectors
    eps_M = np.empty(NM)
    C_mm = block_desc.empty()
    block_desc.diagonalize_ex(H_mm, C_mm, eps_M)

    # Collect eigenvectors on MASTER
    C_MM = local_desc.empty()
    redistributor2 = Redistributor(world, block_desc, local_desc)
    redistributor2.redistribute(C_mm, C_MM)

    # Return eigenvalues and -vectors on Master
    if world.rank == MASTER:
        return eps_M, C_MM
    else:
        return None, None
Example #12
0
def main(nbands=1000, mprocs=2, mb=64):
    # Set-up BlacsGrud
    grid = BlacsGrid(world, mprocs, mprocs)

    # Create descriptor
    nndesc = grid.new_descriptor(nbands, nbands, mb, mb)
    H_nn = nndesc.empty(
        dtype=float)  # outside the BlacsGrid these are size zero
    C_nn = nndesc.empty(
        dtype=float)  # outside the BlacsGrid these are size zero
    eps_N = np.empty((nbands),
                     dtype=float)  # replicated array on all MPI tasks
    # Fill ScaLAPACK array
    alpha = 0.1  # off-diagonal
    beta = 75.0  # diagonal
    uplo = 'L'  # lower-triangular
    scalapack_set(nndesc, H_nn, alpha, beta, uplo)
    scalapack_zero(nndesc, H_nn, switch_uplo[uplo])

    t1 = time()
    # either interface will work, we recommend use the latter interface
    # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L')
    nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N)
    t2 = time()
    world.broadcast(eps_N, 0)  # all MPI tasks now have eps_N
    world.barrier()  # wait for everyone to finish

    if rank == 0:
        print('ScaLAPACK diagonalize_dc', t2 - t1)

    # Create replicated NumPy array
    diagonal = np.eye(nbands, dtype=float)
    offdiagonal = np.tril(np.ones((nbands, nbands)), -1)
    H0 = beta * diagonal + alpha * offdiagonal
    E0 = np.empty((nbands), dtype=float)

    t1 = time()
    diagonalize(H0, E0)
    t2 = time()

    if rank == 0:
        print('LAPACK diagonalize', t2 - t1)

    delta = abs(E0 - eps_N).max()
    if rank == 0:
        print(delta)
        assert delta < tol
Example #13
0
def distribute_MM(wfs, a_MM):
    ksl = wfs.ksl
    if not ksl.using_blacs:
        return a_MM

    dtype = a_MM.dtype
    ksl_comm = ksl.block_comm
    NM = ksl.nao
    grid = BlacsGrid(ksl_comm, 1, 1)
    MM_descriptor = grid.new_descriptor(NM, NM, NM, NM)
    MM2mm = Redistributor(ksl_comm, MM_descriptor, ksl.mmdescriptor)
    if ksl_comm.rank != 0:
        a_MM = MM_descriptor.empty(dtype=dtype)

    a_mm = ksl.mmdescriptor.empty(dtype=dtype)
    MM2mm.redistribute(a_MM, a_mm)
    return a_mm
Example #14
0
def scal_diagonalize(A, nodes='master'):
    # Diagonalize matrix A (size N*N) with scalapack
    # Usage: eps, B = scal_diagonalize(A)
    # eps and B and the eigenvalues and eigenvectors
    # nodes = 'master': eigenvectors only available on master node
    # nodes = 'all': eigenvectors broadcast to all nodes

    # make sure A is N*N, and hermitian
    N = A.shape[0]
    assert A.shape[0] == A.shape[1]
    for i in range(N):
        for j in range(i, N):
            assert A[i, j] == A[j, i].conj()

    # create blacs descriptor
    mb = 64
    g = BlacsGrid(world, 2, size // 2)
    nndesc1 = g.new_descriptor(N, N, N, N)
    nndesc2 = g.new_descriptor(N, N, mb, mb)

    # distribute A to blacs grid A_
    if rank != 0:
        A = nndesc1.zeros(dtype=A.dtype)
    A_ = nndesc2.empty(dtype=A.dtype)
    redistributor = Redistributor(world, nndesc1, nndesc2)
    redistributor.redistribute(A, A_)

    # diagonalize
    B_ = nndesc2.zeros(dtype=A.dtype)
    eps = np.zeros(N, dtype=A.dtype)
    nndesc2.diagonalize_dc(A_, B_, eps, 'L')

    # distribute the eigenvectors to master
    B = np.zeros_like(A)
    redistributor = Redistributor(world, nndesc2, nndesc1)
    redistributor.redistribute(B_, B)

    if nodes == 'master':
        return eps, B
    elif nodes == 'all':
        if rank != 0:
            B = np.zeros((N, N))
        world.broadcast(B, 0)
        return eps, B
Example #15
0
def collect_wuMM(wfs, a_wuMM, w, s, k):
    # This function is based on
    # gpaw/wavefunctions/base.py: WaveFunctions.collect_auxiliary()

    dtype = a_wuMM[0][0].dtype

    ksl = wfs.ksl
    NM = ksl.nao
    kpt_rank, u = wfs.kd.get_rank_and_index(s, k)

    ksl_comm = ksl.block_comm

    if wfs.kd.comm.rank == kpt_rank:
        a_MM = a_wuMM[w][u]

        # Collect within blacs grid
        if ksl.using_blacs:
            a_mm = a_MM
            grid = BlacsGrid(ksl_comm, 1, 1)
            MM_descriptor = grid.new_descriptor(NM, NM, NM, NM)
            mm2MM = Redistributor(ksl_comm, ksl.mmdescriptor, MM_descriptor)

            a_MM = MM_descriptor.empty(dtype=dtype)
            mm2MM.redistribute(a_mm, a_MM)

        # KSL master send a_MM to the global master
        if ksl_comm.rank == 0:
            if kpt_rank == 0:
                assert wfs.world.rank == 0
                # I have it already
                return a_MM
            else:
                wfs.kd.comm.send(a_MM, 0, 2017)
                return None
    elif ksl_comm.rank == 0 and kpt_rank != 0:
        assert wfs.world.rank == 0
        a_MM = np.empty((NM, NM), dtype=dtype)
        wfs.kd.comm.receive(a_MM, kpt_rank, 2017)
        return a_MM
Example #16
0
# in trunk/gpaw/blacs.py for some discussions of
# these idiosyncracies.
import numpy as np

from gpaw.blacs import BlacsGrid, parallelprint
from gpaw.mpi import world
from gpaw.utilities.scalapack import pblas_simple_gemm

gen = np.random.RandomState(42)

# simulate state-parallelization=2 and
# domain-decomposition.prod=32
B = 2
D = 32
mb = 32
grid = BlacsGrid(world, B, D)

nbands = 500
nG = 80**3

nGdesc = grid.new_descriptor(nbands, nG, nbands // B, nG // D)
nndesc = grid.new_descriptor(nbands, nbands, mb, mb)

psit_nG = gen.rand(*nGdesc.shape)
A_nn = gen.rand(*nndesc.shape)

assert nGdesc.check(psit_nG)
assert nndesc.check(A_nn)

parallelprint(world, (A_nn.shape, nndesc.shape, nndesc.lld))
Example #17
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean
        HEA0 = np.ascontiguousarray(HEA0)

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L')

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print('gemm err', gemm_err)
        print('gemv err', gemv_err)
        print('r2k err', r2k_err)
        print('rk_err', rk_err)
        print('hemm_err', hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)
Example #18
0
    def calculate_rkernel(self):

        gd = self.gd
        ng_c = gd.N_c
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        ns = self.calc.wfs.nspins
        n_g = self.n_g  # density on rough grid

        fx_g = ns * self.get_fxc_g(n_g)  # local exchange kernel
        qc_g = (-4 * np.pi * ns / fx_g)**0.5  # cutoff functional
        flocal_g = qc_g**3 * fx_g / (6 * np.pi**2)  # ren. x-kernel for r=r'
        Vlocal_g = 2 * qc_g / np.pi  # ren. Hartree kernel for r=r'

        ng = np.prod(ng_c)  # number of grid points
        r_vg = gd.get_grid_point_coordinates()
        rx_g = r_vg[0].flatten()
        ry_g = r_vg[1].flatten()
        rz_g = r_vg[2].flatten()

        prnt('    %d grid points and %d plane waves at the Gamma point' %
             (ng, self.pd.ngmax),
             file=self.fd)

        # Unit cells
        R_Rv = []
        weight_R = []
        nR_v = self.unit_cells
        nR = np.prod(nR_v)
        for i in range(-nR_v[0] + 1, nR_v[0]):
            for j in range(-nR_v[1] + 1, nR_v[1]):
                for h in range(-nR_v[2] + 1, nR_v[2]):
                    R_Rv.append(i * cell_cv[0] + j * cell_cv[1] +
                                h * cell_cv[2])
                    weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) *
                                    (nR_v[2] - abs(h)) / float(nR))
        if nR > 1:
            # with more than one unit cell only the exchange kernel is
            # calculated on the grid. The bare Coulomb kernel is added
            # in PW basis and Vlocal_g only the exchange part
            dv = self.calc.density.gd.dv
            gc = (3 * dv / 4 / np.pi)**(1 / 3.)
            Vlocal_g -= 2 * np.pi * gc**2 / dv
            prnt('    Lattice point sampling: ' + '(%s x %s x %s)^2 ' %
                 (nR_v[0], nR_v[1], nR_v[2]) +
                 ' Reduced to %s lattice points' % len(R_Rv),
                 file=self.fd)

        l_g_size = -(-ng // mpi.world.size)
        l_g_range = range(mpi.world.rank * l_g_size,
                          min((mpi.world.rank + 1) * l_g_size, ng))

        fhxc_qsGr = {}
        for iq in range(len(self.ibzq_qc)):
            fhxc_qsGr[iq] = np.zeros(
                (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex)

        inv_error = np.seterr()
        np.seterr(invalid='ignore')
        np.seterr(divide='ignore')

        t0 = time()
        # Loop over Lattice points
        for i, R_v in enumerate(R_Rv):
            # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0])
            if i == 1:
                prnt('      Finished 1 cell in %s seconds' % int(time() - t0) +
                     ' - estimated %s seconds left' % int(
                         (len(R_Rv) - 1) * (time() - t0)),
                     file=self.fd)
                self.fd.flush()
            if len(R_Rv) > 5:
                if (i + 1) % (len(R_Rv) / 5 + 1) == 0:
                    prnt('      Finished %s cells in %s seconds' %
                         (i, int(time() - t0)) +
                         ' - estimated %s seconds left' % int(
                             (len(R_Rv) - i) * (time() - t0) / i),
                         file=self.fd)
                    self.fd.flush()
            for g in l_g_range:
                rx = rx_g[g] + R_v[0]
                ry = ry_g[g] + R_v[1]
                rz = rz_g[g] + R_v[2]

                # |r-r'-R_i|
                rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 +
                      (r_vg[2] - rz)**2)**0.5

                n_av = (n_g + n_g.flatten()[g]) / 2.
                fx_g = ns * self.get_fxc_g(n_av, index=g)
                qc_g = (-4 * np.pi * ns / fx_g)**0.5
                x = qc_g * rr
                osc_x = np.sin(x) - x * np.cos(x)
                f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3)
                if nR > 1:  # include only exchange part of the kernel here
                    V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr
                else:  # include the full kernel (also hartree part)
                    V_rr = (sici(x)[0] * 2 / np.pi) / rr

                # Terms with r = r'
                if (np.abs(R_v) < 0.001).all():
                    tmp_flat = f_rr.flatten()
                    tmp_flat[g] = flocal_g.flatten()[g]
                    f_rr = tmp_flat.reshape(ng_c)
                    tmp_flat = V_rr.flatten()
                    tmp_flat[g] = Vlocal_g.flatten()[g]
                    V_rr = tmp_flat.reshape(ng_c)
                    del tmp_flat

                f_rr[np.where(n_av < self.density_cut)] = 0.0
                V_rr[np.where(n_av < self.density_cut)] = 0.0

                f_rr *= weight_R[i]
                V_rr *= weight_R[i]

                # r-r'-R_i
                r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz])

                # Fourier transform of r
                for iq, q in enumerate(self.ibzq_qc):
                    q_v = np.dot(q, icell_cv)
                    e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0))
                    f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng
                    fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q
                    if ns == 2:
                        f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng
                        fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q

        mpi.world.barrier()

        np.seterr(**inv_error)

        for iq, q in enumerate(self.ibzq_qc):
            npw = len(self.pd.G2_qG[iq])
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex)
            l_pw_size = -(-npw // mpi.world.size)  # parallelize over PW below
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))

            if mpi.world.size > 1:
                # redistribute grid and plane waves in fhxc_qsGr[iq]
                bg1 = BlacsGrid(mpi.world, 1, mpi.world.size)
                bg2 = BlacsGrid(mpi.world, mpi.world.size, 1)
                bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size))
                bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng)

                fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)
                if ns == 2:
                    Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)

                r = Redistributor(bg1.comm, bd1, bd2)
                r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng)
                if ns == 2:
                    r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng)
            else:
                fhxc_Glr = fhxc_qsGr[iq][0]
                if ns == 2:
                    Koff_Glr = fhxc_qsGr[iq][1]

            # Fourier transform of r'
            for iG in range(len(l_pw_range)):
                f_g = fhxc_Glr[iG].reshape(ng_c)
                f_G = self.pd.fft(f_g.conj(), iq) * vol / ng
                fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj()
                if ns == 2:
                    v_g = Koff_Glr[iG].reshape(ng_c)
                    v_G = self.pd.fft(v_g.conj(), iq) * vol / ng
                    fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj()

            if ns == 2:  # f_00 = f_11 and f_01 = f_10
                fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw]
                fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw]

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                if nR > 1:  # add Hartree kernel evaluated in PW basis
                    Gq2_G = self.pd.G2_qG[iq]
                    if (q == 0).all():
                        Gq2_G[0] = 1.
                    vq_G = 4 * np.pi / Gq2_G
                    fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)
Example #19
0
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if (dtype == complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) +
                             0.3 * np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) +
                             0.4 * np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0 * np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0 * np.eye(N, N, 0)
        C0 = S0.copy()
        S0_inv = S0.copy()

    # Local result matrices
    W0 = np.empty((N), dtype=float)
    W0_g = np.empty((N), dtype=float)

    # Calculate eigenvalues / other serial results
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0)  # result returned in lower triangle
        tri2full(S0_inv, 'L')
        S0_inv = inv(S0_inv)
        # tri2full(C0) # symmetrize

    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L')  # C0 was previously overwritten
    Glob2dist.redistribute(S0, Sinv, uplo='L')

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')

    scalapack_inverse_cholesky(dist, C, 'L')

    if dtype == complex:  # Only supported for complex for now
        scalapack_inverse(dist, Sinv, 'L')
    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Sinv_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)
    Dist2glob.redistribute(Sinv, Sinv_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test - C0).max()

        tri2full(Sinv_test, 'L')
        inverse_err = abs(Sinv_test - S0_inv).max()
        ## print 'diagonalize ex err', diag_ex_err
        print('diagonalize dc err', diag_dc_err)
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print('general diagonalize dc err', general_diag_dc_err)
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print('inverse chol err', inverse_chol_err)
        if dtype == complex:
            print('inverse err', inverse_err)
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0
        inverse_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err)
    inverse_chol_err = world.sum(inverse_chol_err)
    inverse_err = world.sum(inverse_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
    if dtype == complex:
        assert inverse_err < tol
Example #20
0
    def __init__(self, sl_lrtddft, nrows, lr_comms):
        self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft)

        self.lr_comms = lr_comms

        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        #
        # -----------------------------------------------------------------
        # matrix

        # original grid, ie, how matrix is stored
        self.orig_matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                          self.lr_comms.dd_comm.size,
                                          self.lr_comms.eh_comm.size)

        # solve grid
        self.solve_matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                           self.mprocs, self.nprocs)

        # M = rows, N = cols
        M = nrows * 4
        N = nrows * 4
        mb = 4
        nb = 4
        self.orig_matrix_descr = self.orig_matrix_grid.new_descriptor(
            N, M, nb, mb)

        bs = self.block_size
        self.solve_matrix_descr = self.solve_matrix_grid.new_descriptor(
            N, M, bs, bs)

        self.matrix_in_redist = Redistributor(self.lr_comms.parent_comm,
                                              self.orig_matrix_descr,
                                              self.solve_matrix_descr)

        # -----------------------------------------------------------------
        # vector

        # original grid, ie, how vector is stored
        self.orig_vector_grid = BlacsGrid(
            self.lr_comms.parent_comm, 1,
            (self.lr_comms.dd_comm.size * self.lr_comms.eh_comm.size))

        # solve grid
        #self.solve_vector_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs, self.nprocs)

        # M = rows, N = cols
        M = nrows * 4
        Nrhs = 1
        mb = 4
        nb = 1
        self.orig_vector_descr = self.orig_vector_grid.new_descriptor(
            Nrhs, M, nb, mb)

        bs = self.block_size
        self.solve_vector_descr = self.solve_matrix_grid.new_descriptor(
            Nrhs, M, 1, bs)

        self.vector_in_redist = Redistributor(self.lr_comms.parent_comm,
                                              self.orig_vector_descr,
                                              self.solve_vector_descr)

        self.vector_out_redist = Redistributor(self.lr_comms.parent_comm,
                                               self.solve_vector_descr,
                                               self.orig_vector_descr)
Example #21
0
import numpy as np
from gpaw.utilities.elpa import LibElpa
from gpaw.blacs import BlacsGrid
from gpaw.mpi import world

rng = np.random.RandomState(87878787)

if world.size == 1:
    shape = 1, 1
else:
    shape = world.size // 2, 2
bg = BlacsGrid(world, *shape)

M = 8
blocksize = 2

desc = bg.new_descriptor(M, M, blocksize, blocksize)
sdesc = desc.as_serial()

Aserial = sdesc.zeros()
if world.rank == 0:
    Aserial[:] = rng.rand(*Aserial.shape)
    Aserial += Aserial.T.copy()
A = desc.distribute_from_master(Aserial)
C1 = desc.zeros()
C2 = desc.zeros()
eps1 = np.zeros(M)
eps2 = np.zeros(M)

elpa = LibElpa(desc)
print(elpa)
Example #22
0
    def __init__(self, sl_lrtddft, nkq, dd_comm, eh_comm):
        mcpus, ncpus, blocksize = tuple(sl_lrtddft)
        self.world = eh_comm.parent
        self.dd_comm = dd_comm
        if self.world is None:
            self.world = self.dd_comm

        # All the ranks within domain communicator contain the omega matrix
        # construct new communicator only on domain masters
        eh_ranks = np.arange(eh_comm.size) * dd_comm.size
        self.eh_comm2 = self.world.new_communicator(eh_ranks)

        self.eh_grid = BlacsGrid(self.eh_comm2, eh_comm.size, 1)
        self.eh_descr = self.eh_grid.new_descriptor(nkq, nkq, 1, nkq)
        self.diag_grid = BlacsGrid(self.world, mcpus, ncpus)
        self.diag_descr = self.diag_grid.new_descriptor(
            nkq, nkq, blocksize, blocksize)

        self.redistributor_in = Redistributor(self.world, self.eh_descr,
                                              self.diag_descr)
        self.redistributor_out = Redistributor(self.world, self.diag_descr,
                                               self.eh_descr)
        """
        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        # -----------------------------------------------------------------
        # M = rows, N = cols
        M = nkq*4; N = nkq*4; mb = nkq*4; nb = 4; Nrhs = 1
        # Matrix, mp=1, np=eh_comm.size
        self.eh_grid2a = BlacsGrid(self.eh_comm2, eh_comm.size, 1)
        # Vector, mp=eh_comm.size, np=1
        self.eh_grid2b = BlacsGrid(self.eh_comm2, 1, eh_comm.size)
        self.eh_descr2a = self.eh_grid2a.new_descriptor(N,    M,  nb, mb)
        self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N,   1, nb)

        self.solve_descr2a =self.diag_grid.new_descriptor(N, M,
                                                          blocksize, blocksize)
        self.solve_descr2b =self.diag_grid.new_descriptor(Nrhs, N,
                                                          1, blocksize)

        self.redist_solve_in_2a = Redistributor(self.world,
                                                self.eh_descr2a,
                                                self.solve_descr2a)
        self.redist_solve_in_2b = Redistributor(self.world,
                                                self.eh_descr2b,
                                                self.solve_descr2b)

        self.redist_solve_out_2a = Redistributor(self.world,
                                                 self.solve_descr2a,
                                                 self.eh_descr2a)
        self.redist_solve_out_2b = Redistributor(self.world,
                                                 self.solve_descr2b,
                                                 self.eh_descr2b)
        """

        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        # -----------------------------------------------------------------
        # M = rows, N = cols
        M = nkq * 4
        N = nkq * 4
        mb = 4
        nb = 4
        Nrhs = 1
        # Matrix, mp=1, np=eh_comm.size
        self.eh_grid2a = BlacsGrid(self.world, dd_comm.size, eh_comm.size)
        # Vector, mp=eh_comm.size, np=1
        self.eh_grid2b = BlacsGrid(self.world, 1, dd_comm.size * eh_comm.size)
        self.eh_descr2a = self.eh_grid2a.new_descriptor(N, M, nb, mb)
        self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N, Nrhs, nb)
        self.solve_descr2a = self.diag_grid.new_descriptor(
            N, M, blocksize, blocksize)
        self.solve_descr2b = self.diag_grid.new_descriptor(
            Nrhs, N, Nrhs, blocksize)

        self.redist_solve_in_2a = Redistributor(self.world, self.eh_descr2a,
                                                self.solve_descr2a)
        self.redist_solve_in_2b = Redistributor(self.world, self.eh_descr2b,
                                                self.solve_descr2b)

        self.redist_solve_out_2a = Redistributor(self.world,
                                                 self.solve_descr2a,
                                                 self.eh_descr2a)
        self.redist_solve_out_2b = Redistributor(self.world,
                                                 self.solve_descr2b,
                                                 self.eh_descr2b)
Example #23
0
    def diagonalize_full_hamiltonian(self, ham, atoms, occupations, txt,
                                     nbands=None, scalapack=None, expert=False):
        assert self.dtype == complex

        if nbands is None:
            nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size
        else:
            assert nbands <= self.pd.ngmin

        if expert:
            iu = nbands
        else:
            iu = None

        self.bd = bd = BandDescriptor(nbands, self.bd.comm)

        p = functools.partial(print, file=txt)
        p('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands))
        p('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin,
                                                    self.pd.ngmax))
        mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2
        p('Approximate memory usage per core: {0:.3f} MB'.format(mem))
        if bd.comm.size > 1:
            if isinstance(scalapack, (list, tuple)):
                nprow, npcol, b = scalapack
            else:
                nprow = int(round(bd.comm.size**0.5))
                while bd.comm.size % nprow != 0:
                    nprow -= 1
                npcol = bd.comm.size // nprow
                b = 64
            p('ScaLapack grid: {0}x{1},'.format(nprow, npcol),
              'block-size:', b)
            bg = BlacsGrid(bd.comm, bd.comm.size, 1)
            bg2 = BlacsGrid(bd.comm, nprow, npcol)
            scalapack = True
        else:
            nprow = npcol = 1
            scalapack = False

        self.pt.set_positions(atoms.get_scaled_positions())
        self.kpt_u[0].P_ani = None
        self.allocate_arrays_for_projections(self.pt.my_atom_indices)

        myslice = bd.get_slice()

        pb = ProgressBar(txt)
        nkpt = len(self.kpt_u)
        
        for u, kpt in enumerate(self.kpt_u):
            pb.update(u / nkpt)
            npw = len(self.pd.Q_qG[kpt.q])
            if scalapack:
                mynpw = -(-npw // bd.comm.size)
                md = BlacsDescriptor(bg, npw, npw, mynpw, npw)
                md2 = BlacsDescriptor(bg2, npw, npw, b, b)
            else:
                md = md2 = MatrixDescriptor(npw, npw)

            with self.timer('Build H and S'):
                H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md)

            if scalapack:
                r = Redistributor(bd.comm, md, md2)
                H_GG = r.redistribute(H_GG)
                S_GG = r.redistribute(S_GG)

            psit_nG = md2.empty(dtype=complex)
            eps_n = np.empty(npw)

            with self.timer('Diagonalize'):
                if not scalapack:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, 
                                               iu=iu)
                else:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n)
            del H_GG, S_GG

            kpt.eps_n = eps_n[myslice].copy()

            if scalapack:
                md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw)
                r = Redistributor(bd.comm, md2, md3)
                psit_nG = r.redistribute(psit_nG)

            kpt.psit_nG = psit_nG[:bd.mynbands].copy()
            del psit_nG

            with self.timer('Projections'):
                self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q)

            kpt.f_n = None

        pb.finish()
        
        occupations.calculate(self)
Example #24
0
    def calculate_forces(self, hamiltonian, F_av):
        self.timer.start('LCAO forces')

        spos_ac = self.tci.atoms.get_scaled_positions() % 1.0
        ksl = self.ksl
        nao = ksl.nao
        mynao = ksl.mynao
        nq = len(self.kd.ibzk_qc)
        dtype = self.dtype
        tci = self.tci
        gd = self.gd
        bfs = self.basis_functions

        Mstart = ksl.Mstart
        Mstop = ksl.Mstop

        from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
        isblacs = isinstance(ksl, BlacsOrbitalLayouts)  # XXX

        if not isblacs:
            self.timer.start('TCI derivative')
            dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dPdR_aqvMi = {}
            for a in self.basis_functions.my_atom_indices:
                ni = self.setups[a].ni
                dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype)
            tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM,
                                     dPdR_aqvMi)
            gd.comm.sum(dThetadR_qvMM)
            gd.comm.sum(dTdR_qvMM)
            self.timer.stop('TCI derivative')

            my_atom_indices = bfs.my_atom_indices
            atom_indices = bfs.atom_indices

            def _slices(indices):
                for a in indices:
                    M1 = bfs.M_a[a] - Mstart
                    M2 = M1 + self.setups[a].nao
                    if M2 > 0:
                        yield a, max(0, M1), M2

            def slices():
                return _slices(atom_indices)

            def my_slices():
                return _slices(my_atom_indices)

        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('Initial')

        rhoT_uMM = []
        ET_uMM = []

        if not isblacs:
            if self.kpt_u[0].rho_MM is None:
                self.timer.start('Get density matrix')
                for kpt in self.kpt_u:
                    rhoT_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n, kpt.C_nM)
                    rhoT_uMM.append(rhoT_MM)
                    ET_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n * kpt.eps_n, kpt.C_nM)
                    ET_uMM.append(ET_MM)

                    if hasattr(kpt, 'c_on'):
                        # XXX does this work with BLACS/non-BLACS/etc.?
                        assert self.bd.comm.size == 1
                        d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
                                        dtype=kpt.C_nM.dtype)
                        for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                            d_nn += ne * np.outer(c_n.conj(), c_n)
                        rhoT_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn, kpt.C_nM)
                        ET_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn * kpt.eps_n, kpt.C_nM)
                self.timer.stop('Get density matrix')
            else:
                rhoT_uMM = []
                ET_uMM = []
                for kpt in self.kpt_u:
                    H_MM = self.eigensolver.calculate_hamiltonian_matrix(\
                        hamiltonian, self, kpt)
                    tri2full(H_MM)
                    S_MM = kpt.S_MM.copy()
                    tri2full(S_MM)
                    ET_MM = np.linalg.solve(S_MM,
                                            gemmdot(H_MM,
                                                    kpt.rho_MM)).T.copy()
                    del S_MM, H_MM
                    rhoT_MM = kpt.rho_MM.T.copy()
                    rhoT_uMM.append(rhoT_MM)
                    ET_uMM.append(ET_MM)
        self.timer.stop('Initial')

        if isblacs:  # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            from gpaw.blacs import BlacsGrid, Redistributor

            def get_density_matrix(f_n, C_nM, redistributor):
                rho1_mm = ksl.calculate_blocked_density_matrix(f_n,
                                                               C_nM).conj()
                rho_mm = redistributor.redistribute(rho1_mm)
                return rho_mm

            pcutoff_a = [
                max([pt.get_cutoff() for pt in setup.pt_j])
                for setup in self.setups
            ]
            phicutoff_a = [
                max([phit.get_cutoff() for phit in setup.phit_j])
                for setup in self.setups
            ]

            # XXX should probably use bdsize x gdsize instead
            # That would be consistent with some existing grids
            grid = BlacsGrid(ksl.block_comm, self.gd.comm.size,
                             self.bd.comm.size)

            blocksize1 = -(-nao // grid.nprow)
            blocksize2 = -(-nao // grid.npcol)
            # XXX what are rows and columns actually?
            desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2)

            rhoT_umm = []
            ET_umm = []
            redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc)
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                self.timer.start('Get density matrix')
                rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor)
                rhoT_umm.append(rhoT_mm)
                self.timer.stop('Get density matrix')

                self.timer.start('Potential')
                rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc)

                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_mM, kpt.q)
                del rhoT_mM
                self.timer.stop('Potential')

            self.timer.start('Get density matrix')
            for kpt in self.kpt_u:
                ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM,
                                           redistributor)
                ET_umm.append(ET_mm)
            self.timer.stop('Get density matrix')

            M1start = blocksize1 * grid.myrow
            M2start = blocksize2 * grid.mycol

            M1stop = min(M1start + blocksize1, nao)
            M2stop = min(M2start + blocksize2, nao)

            m1max = M1stop - M1start
            m2max = M2stop - M2start

        if not isblacs:
            # Kinetic energy contribution
            #
            #           ----- d T
            #  a         \       mu nu
            # F += 2 Re   )   -------- rho
            #            /    d R         nu mu
            #           -----    mu nu
            #        mu in a; nu
            #
            Fkin_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] *
                                rhoT_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Fkin_av[a, :] += \
                        2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
            del dEdTrhoT_vMM

            # Density matrix contribution due to basis overlap
            #
            #            ----- d Theta
            #  a          \           mu nu
            # F  += -2 Re  )   ------------  E
            #             /        d R        nu mu
            #            -----        mu nu
            #         mu in a; nu
            #
            Ftheta_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dThetadRE_vMM = (dThetadR_qvMM[kpt.q] *
                                 ET_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Ftheta_av[a, :] += \
                        -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
            del dThetadRE_vMM

        if isblacs:
            from gpaw.lcao.overlap import TwoCenterIntegralCalculator
            self.timer.start('Prepare TCI loop')
            M_a = bfs.M_a

            Fkin2_av = np.zeros_like(F_av)
            Ftheta2_av = np.zeros_like(F_av)

            cell_cv = tci.atoms.cell
            spos_ac = tci.atoms.get_scaled_positions() % 1.0

            overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc,
                                                      derivative=False)

            # XXX this is not parallel *AT ALL*.
            self.timer.start('Get neighbors')
            nl = tci.atompairs.pairs.neighbors
            r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv)
            atompairs = r_and_offset_aao.keys()
            atompairs.sort()
            self.timer.stop('Get neighbors')

            T_expansions = tci.T_expansions
            Theta_expansions = tci.Theta_expansions
            P_expansions = tci.P_expansions
            nq = len(self.kd.ibzk_qc)

            dH_asp = hamiltonian.dH_asp

            self.timer.start('broadcast dH')
            alldH_asp = {}
            for a in range(len(self.setups)):
                gdrank = bfs.sphere_a[a].rank
                if gdrank == gd.rank:
                    dH_sp = dH_asp[a]
                else:
                    ni = self.setups[a].ni
                    dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                gd.comm.broadcast(dH_sp, gdrank)
                # okay, now everyone gets copies of dH_sp
                alldH_asp[a] = dH_sp
            self.timer.stop('broadcast dH')

            # This will get sort of hairy.  We need to account for some
            # three-center overlaps, such as:
            #
            #         a1
            #      Phi   ~a3    a3  ~a3     a2     a2,a1
            #   < ----  |p  > dH   <p   |Phi  > rho
            #      dR
            #
            # To this end we will loop over all pairs of atoms (a1, a3),
            # and then a sub-loop over (a3, a2).
            from gpaw.lcao.overlap import DerivativeAtomicDisplacement

            class Displacement(DerivativeAtomicDisplacement):
                def __init__(self, a1, a2, R_c, offset):
                    phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc,
                                                    offset)
                    DerivativeAtomicDisplacement.__init__(
                        self, None, a1, a2, R_c, offset, phases)

            # Cache of Displacement objects with spherical harmonics with
            # evaluated spherical harmonics.
            disp_aao = {}

            def get_displacements(a1, a2, maxdistance):
                # XXX the way maxdistance is handled it can lead to
                # bad caching when different maxdistances are passed
                # to subsequent calls with same pair of atoms
                disp_o = disp_aao.get((a1, a2))
                if disp_o is None:
                    disp_o = []
                    for R_c, offset in r_and_offset_aao[(a1, a2)]:
                        if np.linalg.norm(R_c) > maxdistance:
                            continue
                        disp = Displacement(a1, a2, R_c, offset)
                        disp_o.append(disp)
                    disp_aao[(a1, a2)] = disp_o
                return [disp for disp in disp_o if disp.r < maxdistance]

            self.timer.stop('Prepare TCI loop')
            self.timer.start('Not so complicated loop')

            for (a1, a2) in atompairs:
                if a1 >= a2:
                    # Actually this leads to bad load balance.
                    # We should take a1 > a2 or a1 < a2 equally many times.
                    # Maybe decide which of these choices
                    # depending on whether a2 % 1 == 0
                    continue

                m1start = M_a[a1] - M1start
                m2start = M_a[a2] - M2start
                if m1start >= blocksize1 or m2start >= blocksize2:
                    continue  # (we have only one block per CPU)

                T_expansion = T_expansions.get(a1, a2)
                Theta_expansion = Theta_expansions.get(a1, a2)
                #P_expansion = P_expansions.get(a1, a2)
                nm1, nm2 = T_expansion.shape

                m1stop = min(m1start + nm1, m1max)
                m2stop = min(m2start + nm2, m2max)

                if m1stop <= 0 or m2stop <= 0:
                    continue

                m1start = max(m1start, 0)
                m2start = max(m2start, 0)
                J1start = max(0, M1start - M_a[a1])
                J2start = max(0, M2start - M_a[a2])
                M1stop = J1start + m1stop - m1start
                J2stop = J2start + m2stop - m2start

                dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype)
                dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype)

                disp_o = get_displacements(a1, a2,
                                           phicutoff_a[a1] + phicutoff_a[a2])
                for disp in disp_o:
                    disp.evaluate_overlap(T_expansion, dTdR_qvmm)
                    disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm)

                for u, kpt in enumerate(self.kpt_u):
                    rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                    ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                    Fkin_v = 2.0 * (
                        dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] *
                        rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
                                                           J2start:J2stop] *
                                      ET_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Fkin2_av[a1] += Fkin_v
                    Fkin2_av[a2] -= Fkin_v
                    Ftheta2_av[a1] -= Ftheta_v
                    Ftheta2_av[a2] += Ftheta_v

            Fkin_av = Fkin2_av
            Ftheta_av = Ftheta2_av
            self.timer.stop('Not so complicated loop')

            dHP_and_dSP_aauim = {}

            a2values = {}
            for (a2, a3) in atompairs:
                if not a3 in a2values:
                    a2values[a3] = []
                a2values[a3].append(a2)

            Fatom_av = np.zeros_like(F_av)
            Frho_av = np.zeros_like(F_av)
            self.timer.start('Complicated loop')
            for a1, a3 in atompairs:
                if a1 == a3:
                    # Functions reside on same atom, so their overlap
                    # does not change when atom is displaced
                    continue
                m1start = M_a[a1] - M1start
                if m1start >= blocksize1:
                    continue

                P_expansion = P_expansions.get(a1, a3)
                nm1 = P_expansion.shape[0]
                m1stop = min(m1start + nm1, m1max)
                if m1stop <= 0:
                    continue

                m1start = max(m1start, 0)
                J1start = max(0, M1start - M_a[a1])
                J1stop = J1start + m1stop - m1start

                disp_o = get_displacements(a1, a3,
                                           phicutoff_a[a1] + pcutoff_a[a3])
                if len(disp_o) == 0:
                    continue

                dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype)
                for disp in disp_o:
                    disp.evaluate_overlap(P_expansion, dPdR_qvmi)

                dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
                for a2 in a2values[a3]:
                    m2start = M_a[a2] - M2start
                    if m2start >= blocksize2:
                        continue

                    P_expansion2 = P_expansions.get(a2, a3)
                    nm2 = P_expansion2.shape[0]
                    m2stop = min(m2start + nm2, m2max)
                    if m2stop <= 0:
                        continue

                    disp_o = get_displacements(a2, a3,
                                               phicutoff_a[a2] + pcutoff_a[a3])
                    if len(disp_o) == 0:
                        continue

                    m2start = max(m2start, 0)
                    J2start = max(0, M2start - M_a[a2])
                    J2stop = J2start + m2stop - m2start

                    if (a2, a3) in dHP_and_dSP_aauim:
                        dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)]
                    else:
                        P_qmi = P_expansion2.zeros((nq, ), dtype=dtype)
                        for disp in disp_o:
                            disp.evaluate_direct(P_expansion2, P_qmi)
                        P_qmi = P_qmi[:, J2start:J2stop].copy()
                        dH_sp = alldH_asp[a3]
                        dS_ii = self.setups[a3].dO_ii

                        dHP_uim = []
                        dSP_uim = []
                        for u, kpt in enumerate(self.kpt_u):
                            dH_ii = unpack(dH_sp[kpt.s])
                            dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
                            # XXX only need nq of these
                            dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
                            dHP_uim.append(dHP_im)
                            dSP_uim.append(dSP_im)
                            dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim

                    for u, kpt in enumerate(self.kpt_u):
                        rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                        ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                        dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
                        dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])

                        Fatom_c = 2.0 * (dPdRdHP_vmm *
                                         rhoT_mm).real.sum(-1).sum(-1)
                        Frho_c = 2.0 * (dPdRdSP_vmm *
                                        ET_mm).real.sum(-1).sum(-1)
                        Fatom_av[a1] += Fatom_c
                        Fatom_av[a3] -= Fatom_c

                        Frho_av[a1] -= Frho_c
                        Frho_av[a3] += Frho_c

            self.timer.stop('Complicated loop')

        if not isblacs:
            # Potential contribution
            #
            #           -----      /  d Phi  (r)
            #  a         \        |        mu    ~
            # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
            #            /        |     d R                nu          nu mu
            #           -----    /         a
            #        mu in a; nu
            #
            self.timer.start('Potential')
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_uMM[u], kpt.q)
            self.timer.stop('Potential')

            # Density matrix contribution from PAW correction
            #
            #           -----                        -----
            #  a         \      a                     \     b
            # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
            #            /      mu nu  nu mu          /     mu nu  nu mu
            #           -----                        -----
            #           mu nu                    b; mu in a; nu
            #
            # with
            #                  b*
            #         -----  dP
            #   b      \       i mu    b   b
            #  Z     =  )   -------- dS   P
            #   mu nu  /     dR        ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Paw correction')
            Frho_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                work_MM = np.zeros((mynao, nao), dtype)
                ZE_MM = None
                for b in my_atom_indices:
                    setup = self.setups[b]
                    dO_ii = np.asarray(setup.dO_ii, dtype)
                    dOP_iM = np.zeros((setup.ni, nao), dtype)
                    gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c')
                    for v in range(3):
                        gemm(1.0, dOP_iM,
                             dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0,
                             work_MM, 'n')
                        ZE_MM = (work_MM * ET_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ZE_MM[M1:M2].sum()
                            Frho_av[a, v] -= dE  # the "b; mu in a; nu" term
                            Frho_av[b, v] += dE  # the "mu nu" term
            del work_MM, ZE_MM
            self.timer.stop('Paw correction')

            # Atomic density contribution
            #            -----                         -----
            #  a          \     a                       \     b
            # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
            #             /     mu nu    nu mu          /     mu nu    nu mu
            #            -----                         -----
            #            mu nu                     b; mu in a; nu
            #
            #                  b*
            #         ----- d P
            #  b       \       i mu   b   b
            # A     =   )   ------- dH   P
            #  mu nu   /    d R       ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Atomic Hamiltonian force')
            Fatom_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                for b in my_atom_indices:
                    H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]),
                                      dtype)
                    HP_iM = gemmdot(
                        H_ii,
                        np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj()))
                    for v in range(3):
                        dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop]
                        ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ArhoT_MM[M1:M2].sum()
                            Fatom_av[a, v] += dE  # the "b; mu in a; nu" term
                            Fatom_av[b, v] -= dE  # the "mu nu" term
            self.timer.stop('Atomic Hamiltonian force')

        F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
        self.timer.start('Wait for sum')
        ksl.orbital_comm.sum(F_av)
        if self.bd.comm.rank == 0:
            self.kd.comm.sum(F_av, 0)
        self.timer.stop('Wait for sum')
        self.timer.stop('LCAO forces')
Example #25
0
    def diagonalize_full_hamiltonian(self, ham, atoms, occupations, log,
                                     nbands=None, ecut=None, scalapack=None,
                                     expert=False):

        if self.dtype != complex:
            raise ValueError('Your wavefunctions are not complex as '
                             'required by the PW diagonalization routine.\n'
                             'Please supply GPAW(..., dtype=complex, ...) '
                             'as an argument to the calculator to enforce '
                             'complex wavefunctions.')

        if nbands is None and ecut is None:
            nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size
        elif nbands is None:
            ecut /= units.Hartree
            vol = abs(np.linalg.det(self.gd.cell_cv))
            nbands = int(vol * ecut**1.5 * 2**0.5 / 3 / pi**2)
        else:
            assert nbands <= self.pd.ngmin

        if expert:
            iu = nbands
        else:
            iu = None

        self.bd = bd = BandDescriptor(nbands, self.bd.comm)

        log('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands))
        log('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin,
                                                      self.pd.ngmax))
        mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2
        log('Approximate memory usage per core: {0:.3f} MB'.format(mem))
        if bd.comm.size > 1:
            if isinstance(scalapack, (list, tuple)):
                nprow, npcol, b = scalapack
            else:
                nprow = int(round(bd.comm.size**0.5))
                while bd.comm.size % nprow != 0:
                    nprow -= 1
                npcol = bd.comm.size // nprow
                b = 64
            log('ScaLapack grid: {0}x{1},'.format(nprow, npcol),
                'block-size:', b)
            bg = BlacsGrid(bd.comm, bd.comm.size, 1)
            bg2 = BlacsGrid(bd.comm, nprow, npcol)
            scalapack = True
        else:
            nprow = npcol = 1
            scalapack = False

        self.set_positions(atoms.get_scaled_positions())
        self.kpt_u[0].P_ani = None
        self.allocate_arrays_for_projections(self.pt.my_atom_indices)

        myslice = bd.get_slice()

        pb = ProgressBar(log.fd)
        nkpt = len(self.kpt_u)

        for u, kpt in enumerate(self.kpt_u):
            pb.update(u / nkpt)
            npw = len(self.pd.Q_qG[kpt.q])
            if scalapack:
                mynpw = -(-npw // bd.comm.size)
                md = BlacsDescriptor(bg, npw, npw, mynpw, npw)
                md2 = BlacsDescriptor(bg2, npw, npw, b, b)
            else:
                md = md2 = MatrixDescriptor(npw, npw)

            with self.timer('Build H and S'):
                H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md)

            if scalapack:
                r = Redistributor(bd.comm, md, md2)
                H_GG = r.redistribute(H_GG)
                S_GG = r.redistribute(S_GG)

            psit_nG = md2.empty(dtype=complex)
            eps_n = np.empty(npw)

            with self.timer('Diagonalize'):
                if not scalapack:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n,
                                               iu=iu)
                else:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n)
            del H_GG, S_GG

            kpt.eps_n = eps_n[myslice].copy()

            if scalapack:
                md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw)
                r = Redistributor(bd.comm, md2, md3)
                psit_nG = r.redistribute(psit_nG)

            kpt.psit_nG = psit_nG[:bd.mynbands].copy()
            del psit_nG

            with self.timer('Projections'):
                self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q)

            kpt.f_n = None

        pb.finish()

        occupations.calculate(self)

        return nbands
Example #26
0
from gpaw.mpi import world
from gpaw.blacs import BlacsGrid, Redistributor

if world.size < 2:
    raise ValueError('Runs on two or more processors')

grid = BlacsGrid(world, 2, world.size // 2)

desc = grid.new_descriptor(12, 8, 2, 3)

a = desc.zeros()
a[:] = world.rank

subdesc = grid.new_descriptor(7, 7, 2, 2)
b = subdesc.zeros()

r = Redistributor(grid.comm, desc, subdesc, uplo='G')

ia = 3
ja = 2
ib = 1
jb = 1
M = 4
N = 5

r.redistribute(a, b, M, N, ia, ja, ib, jb)

a0 = desc.collect_on_master(a)
b0 = subdesc.collect_on_master(b)
if world.rank == 0:
    print a0
Example #27
0
    def get_vchi(self, w_w=None, eta=0.1, q_c=[0.0, 0.0, 0.0],
                 direction=0, ac=1.0, readfile=None, optical=True,
                 write_eig=None):
        """Returns v * \chi where v is the bare Coulomb interaction"""

        self.get_bse_matrix(q_c=q_c, direction=direction, ac=ac,
                            readfile=readfile, optical=optical,
                            write_eig=write_eig)

        w_T = self.w_T
        rhoG0_S = self.rhoG0_S
        df_S = self.df_S

        print('Calculating response function at %s frequency points' %
              len(w_w), file=self.fd)
        vchi_w = np.zeros(len(w_w), dtype=complex)

        if not self.td:
            C_T = np.zeros(self.nS - len(self.excludef_S), complex)
            if world.rank == 0:
                A_T = np.dot(rhoG0_S, self.v_ST)
                B_T = np.dot(rhoG0_S * df_S, self.v_ST)
                tmp = np.dot(self.v_ST.conj().T, self.v_ST)
                overlap_tt = np.linalg.inv(tmp)
                C_T = np.dot(B_T.conj(), overlap_tt.T) * A_T
            world.broadcast(C_T, 0)
        else:
            A_t = np.dot(rhoG0_S, self.v_St)
            B_t = np.dot(rhoG0_S * df_S, self.v_St)
            if world.size == 1:
                C_T = B_t.conj() * A_t
            else:
                Nv = self.nv * (self.spinors + 1)
                Nc = self.nc * (self.spinors + 1)
                Ns = self.spins
                nS = self.nS
                ns = -(-self.kd.nbzkpts // world.size) * Nv * Nc * Ns
                grid = BlacsGrid(world, world.size, 1)
                desc = grid.new_descriptor(nS, 1, ns, 1)
                C_t = desc.empty(dtype=complex)
                C_t[:, 0] = B_t.conj() * A_t
                C_T = desc.collect_on_master(C_t)[:, 0]
                if world.rank != 0:
                    C_T = np.empty(nS, dtype=complex)
                world.broadcast(C_T, 0)

        eta /= Hartree
        for iw, w in enumerate(w_w / Hartree):
            tmp_T = 1. / (w - w_T + 1j * eta)
            vchi_w[iw] += np.dot(tmp_T, C_T)
        vchi_w *= 4 * np.pi / self.vol

        if not np.allclose(self.q_c, 0.0):
            cell_cv = self.calc.wfs.gd.cell_cv
            B_cv = 2 * np.pi * np.linalg.inv(cell_cv).T
            q_v = np.dot(q_c, B_cv)
            vchi_w /= np.dot(q_v, q_v)

        """Check f-sum rule."""
        nv = self.calc.wfs.setups.nvalence
        dw_w = (w_w[1:] - w_w[:-1]) / Hartree
        wchi_w = (w_w[1:] * vchi_w[1:] + w_w[:-1] * vchi_w[:-1]) / Hartree / 2
        N = -np.dot(dw_w, wchi_w.imag) * self.vol / (2 * np.pi**2)
        print(file=self.fd)
        print('Checking f-sum rule:', file=self.fd)
        print('  Valence = %s, N = %f' % (nv, N), file=self.fd)
        print(file=self.fd)

        if write_eig is not None:
            if world.rank == 0:
                f = open(write_eig, 'w')
                print('# %s eigenvalues in eV' % self.mode, file=f)
                for iw, w in enumerate(self.w_T * Hartree):
                    print('%8d %12.6f %12.16f' % (iw, w.real, C_T[iw].real),
                          file=f)
                f.close()

        return vchi_w * ac