Exemple #1
0
def main(nbands=1000, mprocs=2, mb=64):
    # Set-up BlacsGrud
    grid = BlacsGrid(world, mprocs, mprocs)

    # Create descriptor
    nndesc = grid.new_descriptor(nbands, nbands, mb, mb)
    H_nn = nndesc.empty(
        dtype=float)  # outside the BlacsGrid these are size zero
    C_nn = nndesc.empty(
        dtype=float)  # outside the BlacsGrid these are size zero
    eps_N = np.empty((nbands),
                     dtype=float)  # replicated array on all MPI tasks
    # Fill ScaLAPACK array
    alpha = 0.1  # off-diagonal
    beta = 75.0  # diagonal
    uplo = 'L'  # lower-triangular
    scalapack_set(nndesc, H_nn, alpha, beta, uplo)
    scalapack_zero(nndesc, H_nn, switch_uplo[uplo])

    t1 = time()
    # either interface will work, we recommend use the latter interface
    # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L')
    nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N)
    t2 = time()
    world.broadcast(eps_N, 0)  # all MPI tasks now have eps_N
    world.barrier()  # wait for everyone to finish

    if rank == 0:
        print('ScaLAPACK diagonalize_dc', t2 - t1)

    # Create replicated NumPy array
    diagonal = np.eye(nbands, dtype=float)
    offdiagonal = np.tril(np.ones((nbands, nbands)), -1)
    H0 = beta * diagonal + alpha * offdiagonal
    E0 = np.empty((nbands), dtype=float)

    t1 = time()
    diagonalize(H0, E0)
    t2 = time()

    if rank == 0:
        print('LAPACK diagonalize', t2 - t1)

    delta = abs(E0 - eps_N).max()
    if rank == 0:
        print(delta)
        assert delta < tol
def main(nbands=1000, mprocs=2, mb=64):
    # Set-up BlacsGrud
    grid = BlacsGrid(world, mprocs, mprocs)

    # Create descriptor
    nndesc = grid.new_descriptor(nbands, nbands, mb, mb)
    H_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero
    C_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero
    eps_N  = np.empty((nbands), dtype=float) # replicated array on all MPI tasks 
    # Fill ScaLAPACK array
    alpha = 0.1 # off-diagonal
    beta = 75.0 # diagonal
    uplo = 'L' # lower-triangular
    scalapack_set(nndesc, H_nn, alpha, beta, uplo)
    scalapack_zero(nndesc, H_nn, switch_uplo[uplo])

    t1 = time()
    # either interface will work, we recommend use the latter interface
    # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L')
    nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) 
    t2 = time()
    world.broadcast(eps_N, 0) # all MPI tasks now have eps_N
    world.barrier() # wait for everyone to finish

    if rank == 0:
        print('ScaLAPACK diagonalize_dc', t2-t1)

    # Create replicated NumPy array    
    diagonal = np.eye(nbands,dtype=float)
    offdiagonal = np.tril(np.ones((nbands,nbands)), -1)
    H0 = beta*diagonal + alpha*offdiagonal
    E0 = np.empty((nbands), dtype=float)

    t1 = time()
    diagonalize(H0,E0)
    t2 = time()

    if rank == 0:
        print('LAPACK diagonalize', t2-t1)

    delta = abs(E0-eps_N).max()
    if rank == 0:
        print(delta)
        assert delta < tol
Exemple #3
0
def main(seed=42, dtype=float):
    ksl = BlacsBandLayouts(gd, bd, block_comm, dtype, mcpus, ncpus, blocksize)
    nbands = bd.nbands
    mynbands = bd.mynbands

    # Diagonalize
    # We would *not* create H_Nn in the real-space code this way.
    # This is just for testing purposes.
    # Note after MPI_Reduce, only meaningful information on gd masters
    H_Nn = np.zeros((nbands, mynbands), dtype=dtype)
    U_nN = np.empty((mynbands, nbands), dtype=dtype)

    if ksl.Nndescriptor:  # hack
        scalapack_set(ksl.Nndescriptor, H_Nn, 0.1, 75.0, 'L')
    else:
        assert gd.comm.rank != 0

    print("H_Nn")
    parallelprint(world, H_Nn)

    eps_n = np.zeros(bd.mynbands)
    blacs_diagonalize(ksl, H_Nn, U_nN, eps_n)
    print("U_nN")
    parallelprint(world, U_nN)
    print("eps_n")
    parallelprint(world, eps_n)

    # Inverse Cholesky
    S_Nn = np.zeros((nbands, mynbands), dtype=dtype)
    C_nN = np.empty((mynbands, nbands), dtype=dtype)

    if ksl.Nndescriptor:  # hack
        scalapack_set(ksl.Nndescriptor, S_Nn, 0.1, 75.0, 'L')
    else:
        assert gd.comm.rank != 0

    print("S_Nn")
    parallelprint(world, S_Nn)
    blacs_inverse_cholesky(ksl, S_Nn, C_nN)
    print("C_nN")
    parallelprint(world, C_nN)
Exemple #4
0
def main(seed=42, dtype=float):
    ksl = BlacsBandLayouts(gd, bd, dtype, mcpus, ncpus, blocksize)
    nbands = bd.nbands
    mynbands = bd.mynbands

    # Diagonalize
    # We would *not* create H_Nn in the real-space code this way.
    # This is just for testing purposes.
    # Note after MPI_Reduce, only meaningful information on gd masters
    H_Nn = np.zeros((nbands, mynbands), dtype=dtype)
    U_nN = np.empty((mynbands, nbands), dtype=dtype)

    if ksl.Nndescriptor: # hack
        scalapack_set(ksl.Nndescriptor, H_Nn, 0.1, 75.0, 'L')
    else:
        assert gd.comm.rank != 0

    print "H_Nn"
    parallelprint(world, H_Nn)
    
    eps_n = np.zeros(bd.mynbands)
    blacs_diagonalize(ksl, H_Nn, U_nN, eps_n)
    print "U_nN"
    parallelprint(world, U_nN)
    print "eps_n"
    parallelprint(world, eps_n)
    
    # Inverse Cholesky
    S_Nn = np.zeros((nbands, mynbands), dtype=dtype)
    C_nN = np.empty((mynbands, nbands), dtype=dtype) 

    if ksl.Nndescriptor: # hack
        scalapack_set(ksl.Nndescriptor, S_Nn, 0.1, 75.0, 'L')
    else:
        assert gd.comm.rank != 0

    print "S_Nn"
    parallelprint(world, S_Nn)
    blacs_inverse_cholesky(ksl, S_Nn, C_nN)
    print "C_nN"
    parallelprint(world, C_nN)
Exemple #5
0
    def test_trivial_cholesky(self):
        # Set up Hermitian overlap operator:
        S = lambda x: x
        dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii)
        nblocks = self.get_optimal_number_of_blocks(self.blocking)
        overlap = MatrixOperator(self.ksl, nblocks, self. async, True)
        S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S,
                                                 dS)

        # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n>
        I_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype)
        scalapack_set(self.ksl.nndescriptor, I_nn, 0.0, 1.0, 'L')
        alpha = 1e-3  # shift eigenvalues away from zero

        C_nn = S_nn + alpha * I_nn
        self.ksl.nndescriptor.inverse_cholesky(C_nn, 'L')
        self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani)
        D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S,
                                                 dS)

        D_NN = self.ksl.nndescriptor.collect_on_master(D_nn)
        if self.bd.comm.rank == 0 and self.gd.comm.rank == 0:
            assert D_NN.shape == (self.bd.nbands, ) * 2
            D_NN = D_NN.T.copy()  # Fortran -> C indexing
            tri2full(D_NN, 'U')  # upper to lower..
        else:
            assert D_NN.nbytes == 0
            D_NN = np.empty((self.bd.nbands, ) * 2, dtype=D_NN.dtype)

        if self.bd.comm.rank == 0:
            self.gd.comm.broadcast(D_NN, 0)
        self.bd.comm.broadcast(D_NN, 0)

        # D_NN = C_NN^dag * S_NN * C_NN = I_NN - alpha * C_NN^dag * C_NN
        I_NN = np.eye(self.bd.nbands)
        C0_NN = np.linalg.inv(
            np.linalg.cholesky(self.S0_nn + alpha * I_NN).T.conj())
        D0_NN = I_NN - alpha * np.dot(C0_NN.T.conj(), C0_NN)
        self.check_and_plot(D_NN, D0_NN, 6, 'trivial,cholesky')  #XXX precision
Exemple #6
0
    def test_trivial_cholesky(self):
        # Set up Hermitian overlap operator:
        S = lambda x: x
        dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii)
        nblocks = self.get_optimal_number_of_blocks(self.blocking)
        overlap = MatrixOperator(self.ksl, nblocks, self.async, True)
        S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS)

        # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n>
        I_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype)
        scalapack_set(self.ksl.nndescriptor, I_nn, 0.0, 1.0, 'L')
        alpha = 1e-3 # shift eigenvalues away from zero

        C_nn = S_nn + alpha * I_nn
        self.ksl.nndescriptor.inverse_cholesky(C_nn, 'L')
        self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani)
        D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS)

        D_NN = self.ksl.nndescriptor.collect_on_master(D_nn)
        if self.bd.comm.rank == 0 and self.gd.comm.rank == 0:
            assert D_NN.shape == (self.bd.nbands,) * 2
            D_NN = D_NN.T.copy() # Fortran -> C indexing
            tri2full(D_NN, 'U') # upper to lower..
        else:
            assert D_NN.nbytes == 0
            D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype)

        if self.bd.comm.rank == 0:
            self.gd.comm.broadcast(D_NN, 0)
        self.bd.comm.broadcast(D_NN, 0)

        # D_NN = C_NN^dag * S_NN * C_NN = I_NN - alpha * C_NN^dag * C_NN
        I_NN = np.eye(self.bd.nbands)
        C0_NN = np.linalg.inv(np.linalg.cholesky(self.S0_nn + alpha*I_NN).T.conj())
        D0_NN = I_NN - alpha * np.dot(C0_NN.T.conj(), C0_NN)
        self.check_and_plot(D_NN, D0_NN, 6, 'trivial,cholesky') #XXX precision
Exemple #7
0
    def linear_propagator(self, sourceC_nM, targetC_nM, S_MM, H_MM, dt):
        self.timer.start('Linear solve')

        if self.blacs:
            # XXX, Preallocate
            target_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex)
            temp_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex)
            temp_block_mm = self.mm_block_descriptor.empty(dtype=complex)
            if self.density.gd.comm.rank != 0:
                # XXX Fake blacks nbands, nao, nbands, nao grid because some
                # weird asserts
                # (these are 0,x or x,0 arrays)
                sourceC_nM = self.CnM_unique_descriptor.zeros(dtype=complex)

            # 1. target = (S+0.5j*H*dt) * source
            # Wave functions to target
            self.CnM2nm.redistribute(sourceC_nM, temp_blockC_nm)

            # XXX It can't be this f'n hard to symmetrize a matrix (tri2full)
            # Remove upper diagonal
            scalapack_zero(self.mm_block_descriptor, H_MM, 'U')
            # Lower diagonal matrix:
            temp_block_mm[:] = S_MM - (0.5j * dt) * H_MM
            scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U')
            # Note it's strictly lower diagonal matrix
            # Add transpose of H
            pblas_tran(-0.5j * dt, H_MM, 1.0, temp_block_mm,
                       self.mm_block_descriptor, self.mm_block_descriptor)
            # Add transpose of S
            pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor,
                       self.mm_block_descriptor)

            pblas_simple_gemm(self.Cnm_block_descriptor,
                              self.mm_block_descriptor,
                              self.Cnm_block_descriptor, temp_blockC_nm,
                              temp_block_mm, target_blockC_nm)
            # 2. target = (S-0.5j*H*dt)^-1 * target
            # temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM
            # XXX It can't be this f'n hard to symmetrize a matrix (tri2full)
            # Lower diagonal matrix:
            temp_block_mm[:] = S_MM + (0.5j * dt) * H_MM
            # Not it's stricly lower diagonal matrix:
            scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U')
            # Add transpose of H:
            pblas_tran(+0.5j * dt, H_MM, 1.0, temp_block_mm,
                       self.mm_block_descriptor, self.mm_block_descriptor)
            # Add transpose of S
            pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor,
                       self.mm_block_descriptor)

            scalapack_solve(self.mm_block_descriptor,
                            self.Cnm_block_descriptor, temp_block_mm,
                            target_blockC_nm)

            if self.density.gd.comm.rank != 0:  # XXX is this correct?
                # XXX Fake blacks nbands, nao, nbands, nao grid because some
                # weird asserts
                # (these are 0,x or x,0 arrays)
                target = self.CnM_unique_descriptor.zeros(dtype=complex)
            else:
                target = targetC_nM
            self.Cnm2nM.redistribute(target_blockC_nm, target)
            self.density.gd.comm.broadcast(targetC_nM, 0)  # Is this required?
        else:
            # Note: The full equation is conjugated (therefore -+, not +-)
            targetC_nM[:] = \
                solve(S_MM - 0.5j * H_MM * dt,
                      np.dot(S_MM + 0.5j * H_MM * dt,
                             sourceC_nM.T.conjugate())).T.conjugate()

        self.timer.stop('Linear solve')
    def linear_propagator(self, sourceC_nM, targetC_nM, S_MM, H_MM, dt):
        self.timer.start('Linear solve')
        # XXX Debugging stuff. Remove
        if self.propagator_debug:
            if self.blacs:
                globalH_MM = self.blacs_mm_to_global(H_MM)
                globalS_MM = self.blacs_mm_to_global(S_MM)
                if world.rank == 0:
                    tri2full(globalS_MM, 'L')
                    tri2full(globalH_MM, 'L')
                    U_MM = dot(inv(globalS_MM-0.5j*globalH_MM*dt), globalS_MM+0.5j*globalH_MM*dt)
                    debugC_nM = dot(sourceC_nM, U_MM.T.conjugate())
                    #print 'PASS PROPAGATOR'
                    #debugC_nM = sourceC_nM.copy()
            else:
                if world.rank == 0:
                    U_MM = dot(inv(S_MM-0.5j*H_MM*dt), S_MM+0.5j*H_MM*dt)
                    debugC_nM = dot(sourceC_nM, U_MM.T.conjugate())
                #print 'PASS PROPAGATOR'
                #debugC_nM = sourceC_nM.copy()

        if self.blacs:
            target_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) # XXX, Preallocate
            temp_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) # XXX, Preallocate
            temp_block_mm = self.mm_block_descriptor.empty(dtype=complex)
            if self.density.gd.comm.rank != 0:
                # XXX Fake blacks nbands, nao, nbands, nao grid because some weird asserts
                # (these are 0,x or x,0 arrays)
                sourceC_nM = self.CnM_unique_descriptor.zeros(dtype=complex)

            # 1. target = (S+0.5j*H*dt) * source
            # Wave functions to target
            self.CnM2nm.redistribute(sourceC_nM, temp_blockC_nm)

            # XXX It can't be this f'n hard to symmetrize a matrix (tri2full)
            scalapack_zero(self.mm_block_descriptor, H_MM, 'U') # Remove upper diagonal
            temp_block_mm[:] = S_MM - (0.5j*dt) * H_MM  # Lower diagonal matrix
            scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U')
            # Note it's stricly lower diagonal matrix
            pblas_tran(-0.5j*dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of H
            pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S

            pblas_simple_gemm(self.Cnm_block_descriptor,
                              self.mm_block_descriptor,
                              self.Cnm_block_descriptor,
                              temp_blockC_nm,
                              temp_block_mm,
                              target_blockC_nm)
            # 2. target = (S-0.5j*H*dt)^-1 * target
            #temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM
            # XXX It can't be this f'n hard to symmetrize a matrix (tri2full)
            temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM  # Lower diagonal matrix
            scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U') # Not it's stricly lower diagonal matrix           
            pblas_tran(+0.5j*dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of H
            pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S

            scalapack_solve(self.mm_block_descriptor, 
                            self.Cnm_block_descriptor, 
                            temp_block_mm,
                            target_blockC_nm)

            if self.density.gd.comm.rank != 0: # XXX is this correct?
                # XXX Fake blacks nbands, nao, nbands, nao grid because some weird asserts
                # (these are 0,x or x,0 arrays)
                target = self.CnM_unique_descriptor.zeros(dtype=complex)
            else:
                target = targetC_nM
            self.Cnm2nM.redistribute(target_blockC_nm, target)
            self.density.gd.comm.broadcast(targetC_nM, 0) # Is this required?
        else:
            # Note: The full equation is conjugated (therefore -+, not +-)
            targetC_nM[:] = solve(S_MM-0.5j*H_MM*dt, np.dot(S_MM+0.5j*H_MM*dt, sourceC_nM.T.conjugate())).T.conjugate()
        
        # XXX Debugging stuff. Remove
        if self.propagator_debug:
            if world.rank == 0:
                verify(targetC_nM, debugC_nM,
                       'Linear solver propagator vs. reference')

        self.timer.stop('Linear solve')