def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_nn = self.S0_nn if self.dtype == complex: C_nn = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_nn = np.random.normal(size=self.nbands**2) C_nn = C_nn.reshape( (self.nbands, self.nbands)) / np.linalg.norm(C_nn, 2) world.broadcast(C_nn, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, True) self.psit_nG = overlap.matrix_multiply(C_nn.T.copy(), self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(D_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_nn, 0) self.bd.comm.broadcast(D_nn, 0) if memstats: self.mem_test = record_memory() # D_nn = C_nn^dag * S_nn * C_nn D0_nn = np.dot(C_nn.T.conj(), np.dot(S_nn, C_nn)) self.check_and_plot(D_nn, D0_nn, 9, 'multiply,randomized')
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j * np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha * x dS = lambda a, P_ni: np.dot(alpha * P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() S_NN = self.ksl.nndescriptor.collect_on_master(S_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert S_NN.shape == (self.bd.nbands, ) * 2 S_NN = S_NN.T.copy() # Fortran -> C indexing else: assert S_NN.nbytes == 0 S_NN = np.empty((self.bd.nbands, ) * 2, dtype=S_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_NN, 0) self.bd.comm.broadcast(S_NN, 0) self.check_and_plot(S_NN, alpha * self.S0_nn, 9, 'overlaps,nonhermitian')
def run(): S_nn = overlap(psit_mG, send_mG, recv_mG) t1 = time() if world.rank == 0: inverse_cholesky(S_nn) C_nn = S_nn else: C_nn = np.empty((N, N)) t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2-t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG[:] = matrix_multiply(C_nn, psit_mG, send_mG, recv_mG) if world.rank == 0: print 'Made it past matrix multiply' # Check: S_nn = overlap(psit_mG, send_mG, recv_mG) # Assert below requires more memory. if world.rank == 0: # Fill in upper part: for n in range(N - 1): S_nn[n, n + 1:] = S_nn[n + 1:, n] assert (S_nn.round(7) == np.eye(N)).all()
def run(): S_nn = overlap(psit_mG, send_mG, recv_mG) t1 = time() if world.rank == 0: inverse_cholesky(S_nn) C_nn = S_nn else: C_nn = np.empty((N, N)) t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2 - t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG[:] = matrix_multiply(C_nn, psit_mG, send_mG, recv_mG) if world.rank == 0: print 'Made it past matrix multiply' # Check: S_nn = overlap(psit_mG, send_mG, recv_mG) # Assert below requires more memory. if world.rank == 0: # Fill in upper part: for n in range(N - 1): S_nn[n, n + 1:] = S_nn[n + 1:, n] assert (S_nn.round(7) == np.eye(N)).all()
def run(): S_nn = overlap(psit_mG, send_mG, recv_mG) t1 = time() if world.rank == 0: C_nn = np.linalg.inv(np.linalg.cholesky(S_nn)).copy() else: C_nn = np.empty((N, N)) t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2 - t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG[:] = matrix_multiply(C_nn, psit_mG, send_mG, recv_mG) # Check: S_nn = overlap(psit_mG, send_mG, recv_mG) if world.rank == 0: # Fill in upper part: for n in range(N - 1): S_nn[n, n + 1:] = S_nn[n + 1:, n] assert (S_nn.round(7) == np.eye(N)).all()
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u,n,a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u,n,a:a+1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u, n, a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u, n, a:a + 1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_nn = self.S0_nn if self.dtype == complex: C_nn = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_nn = np.random.normal(size=self.nbands**2) C_nn = C_nn.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_nn,2) world.broadcast(C_nn, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) self.psit_nG = overlap.matrix_multiply(C_nn.T.copy(), self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> tri2full(D_nn, 'U') # upper to lower... if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_nn, 0) self.bd.comm.broadcast(D_nn, 0) if memstats: self.mem_test = record_memory() # D_nn = C_nn^dag * S_nn * C_nn D0_nn = np.dot(C_nn.T.conj(), np.dot(S_nn, C_nn)) self.check_and_plot(D_nn, D0_nn, 9, 'multiply,randomized')
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() S_NN = self.ksl.nndescriptor.collect_on_master(S_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert S_NN.shape == (self.bd.nbands,) * 2 S_NN = S_NN.T.copy() # Fortran -> C indexing else: assert S_NN.nbytes == 0 S_NN = np.empty((self.bd.nbands,) * 2, dtype=S_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_NN, 0) self.bd.comm.broadcast(S_NN, 0) self.check_and_plot(S_NN, alpha*self.S0_nn, 9, 'overlaps,nonhermitian')
def run(): S_nn = overlap(psit_mG, send_mG, recv_mG) t1 = time() if world.rank == 0: C_nn = np.linalg.inv(np.linalg.cholesky(S_nn)).copy() else: C_nn = np.empty((N, N)) t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2-t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG[:] = matrix_multiply(C_nn, psit_mG, send_mG, recv_mG) # Check: S_nn = overlap(psit_mG, send_mG, recv_mG) if world.rank == 0: # Fill in upper part: for n in range(N - 1): S_nn[n, n + 1:] = S_nn[n + 1:, n] assert (S_nn.round(7) == np.eye(N)).all()
def get_random_number(): if world.rank == MASTER: rand = np.random.rand(1) else: rand = np.empty(1) world.broadcast(rand, MASTER) rand = rand[0] return rand
def diagonalize(self): print('Diagonalizing Hamiltonian', file=self.fd) """The t and T represent local and global eigenstates indices respectively """ # Non-Hermitian matrix can only use linalg.eig if not self.td: print(' Using numpy.linalg.eig...', file=self.fd) print(' Eliminated %s pair orbitals' % len(self.excludef_S), file=self.fd) self.H_SS = self.collect_A_SS(self.H_sS) self.w_T = np.zeros(self.nS - len(self.excludef_S), complex) if world.rank == 0: self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=0) self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=1) self.w_T, self.v_ST = np.linalg.eig(self.H_SS) world.broadcast(self.w_T, 0) self.df_S = np.delete(self.df_S, self.excludef_S) self.rhoG0_S = np.delete(self.rhoG0_S, self.excludef_S) # Here the eigenvectors are returned as complex conjugated rows else: if world.size == 1: print(' Using lapack...', file=self.fd) from gpaw.utilities.lapack import diagonalize self.w_T = np.zeros(self.nS) diagonalize(self.H_sS, self.w_T) self.v_St = self.H_sS.conj().T else: print(' Using scalapack...', file=self.fd) nS = self.nS ns = -(-self.kd.nbzkpts // world.size) * (self.nv * self.nc * self.spins * (self.spinors + 1)**2) grid = BlacsGrid(world, world.size, 1) desc = grid.new_descriptor(nS, nS, ns, nS) desc2 = grid.new_descriptor(nS, nS, 2, 2) H_tmp = desc2.zeros(dtype=complex) r = Redistributor(world, desc, desc2) r.redistribute(self.H_sS, H_tmp) self.w_T = np.empty(nS) v_tmp = desc2.empty(dtype=complex) desc2.diagonalize_dc(H_tmp, v_tmp, self.w_T) r = Redistributor(grid.comm, desc2, desc) self.v_St = desc.zeros(dtype=complex) r.redistribute(v_tmp, self.v_St) self.v_St = self.v_St.conj().T if self.write_v and self.td: # Cannot use par_save without td self.par_save('v_TS.ulm', 'v_TS', self.v_St.T) return
def test_multiply_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j * np.random.normal(size=1) world.broadcast(alpha, 0) # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = alpha * self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape( (self.nbands, self.nbands)) / np.linalg.norm(C_NN, 2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: alpha * x dS = lambda a, P_ni: np.dot(alpha * P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands, ) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands, ) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands, ) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,nonhermitian')
def test_multiply_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = alpha*self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_NN,2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) if 0: #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G' blockcomm = self.ksl.nndescriptor.blacsgrid.comm self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor, self.ksl.nndescriptor) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands,) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,nonhermitian')
def setUp(self): for virtvar in ['distribution']: assert getattr(self,virtvar) is not None, 'Virtual "%s"!' % virtvar UTDomainParallelSetup.setUp(self) # Initial layout random_a = np.random.uniform(0, self.gd.comm.size, size=self.natoms).astype(int) world.broadcast(random_a, 0) spos_ac = self.atoms.get_scaled_positions() % 1.0 self.rank0_a = {'master' : np.zeros(self.natoms, dtype=int), 'domains' : self.gd.get_ranks_from_positions(spos_ac), 'balanced': random_a}[self.distribution]
def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape( (self.nbands, self.nbands)) / np.linalg.norm(C_NN, 2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, True) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands, ) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands, ) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower... else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands, ) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,randomized')
def gatherv(m, N=None): from gpaw.mpi import world, size, rank if world.size == 1: return m ndim = m.ndim if ndim == 2: n, N = m.shape assert n < N M = np.zeros((N, N), dtype=complex) elif ndim == 1: n = m.shape[0] M = np.zeros(N, dtype=complex) else: print 'Not Implemented' XX n_index = np.zeros(size, dtype=int) world.all_gather(np.array([n]), n_index) root = 0 if rank != root: world.ssend(m, root, 112+rank) else: for irank, n in enumerate(n_index): if irank == root: if ndim == 2: M[:n_index[0] :] = m else: M[:n_index[0]] = m else: n_start = n_index[0:irank].sum() n_end = n_index[0:irank+1].sum() if ndim == 2: tmp_nN = np.zeros((n, N), dtype=complex) world.receive(tmp_nN, irank, 112+irank) M[n_start:n_end, :] = tmp_nN else: tmp_n = np.zeros(n, dtype=complex) world.receive(tmp_n, irank, 112+irank) M[n_start:n_end] = tmp_n world.broadcast(M, root) return M
def test_multiply_randomized(self): # Known starting point of S_nn = <psit_m|S|psit_n> S_NN = self.S0_nn if self.dtype == complex: C_NN = np.random.uniform(size=self.nbands**2) * \ np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2)) else: C_NN = np.random.normal(size=self.nbands**2) C_NN = C_NN.reshape((self.nbands,self.nbands)) / np.linalg.norm(C_NN,2) world.broadcast(C_NN, 0) # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert C_NN.shape == (self.bd.nbands,) * 2 tmp_NN = C_NN.T.copy() # C -> Fortran indexing else: tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype) C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN) self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) if memstats: self.mem_test = record_memory() D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower... else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_nn = C_nn^dag * S_nn * C_nn D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN)) self.check_and_plot(D_NN, D0_NN, 9, 'multiply,randomized')
def main(nbands=1000, mprocs=2, mb=64): # Set-up BlacsGrud grid = BlacsGrid(world, mprocs, mprocs) # Create descriptor nndesc = grid.new_descriptor(nbands, nbands, mb, mb) H_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero C_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero eps_N = np.empty((nbands), dtype=float) # replicated array on all MPI tasks # Fill ScaLAPACK array alpha = 0.1 # off-diagonal beta = 75.0 # diagonal uplo = 'L' # lower-triangular scalapack_set(nndesc, H_nn, alpha, beta, uplo) scalapack_zero(nndesc, H_nn, switch_uplo[uplo]) t1 = time() # either interface will work, we recommend use the latter interface # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L') nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) t2 = time() world.broadcast(eps_N, 0) # all MPI tasks now have eps_N world.barrier() # wait for everyone to finish if rank == 0: print('ScaLAPACK diagonalize_dc', t2 - t1) # Create replicated NumPy array diagonal = np.eye(nbands, dtype=float) offdiagonal = np.tril(np.ones((nbands, nbands)), -1) H0 = beta * diagonal + alpha * offdiagonal E0 = np.empty((nbands), dtype=float) t1 = time() diagonalize(H0, E0) t2 = time() if rank == 0: print('LAPACK diagonalize', t2 - t1) delta = abs(E0 - eps_N).max() if rank == 0: print(delta) assert delta < tol
def run(psit_mG): overlap = MatrixOperator(ksl, J) def H(psit_xG): Htpsit_xG = np.empty_like(psit_xG) kin(psit_xG, Htpsit_xG) for psit_G, y_G in zip(psit_xG, Htpsit_xG): y_G += vt_G * psit_G return Htpsit_xG dH_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dH(a, P_ni): return np.dot(P_ni, dH_aii[a]) H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) t1 = time() if world.rank == 0: eps_n, H_nn = np.linalg.eigh(H_nn) H_nn = np.ascontiguousarray(H_nn.T) t2 = time() if world.rank == 0: print('Diagonalization Time %f' % (t2 - t1)) print(eps_n) # Distribute matrix: world.broadcast(H_nn, 0) psit_mG = overlap.matrix_multiply(H_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: assert not (P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not (P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) if world.rank == 0: for n in range(N): assert abs(H_nn[n, n] - eps_n[n]) < 2e-8 assert not H_nn[n + 1:, n].round(8).any() return psit_mG
def setUp(self): for virtvar in ['distribution']: assert getattr(self, virtvar) is not None, 'Virtual "%s"!' % virtvar UTDomainParallelSetup.setUp(self) # Initial layout random_a = np.random.uniform(0, self.gd.comm.size, size=self.natoms).astype(int) world.broadcast(random_a, 0) spos_ac = self.atoms.get_scaled_positions() % 1.0 self.rank0_a = { 'master': np.zeros(self.natoms, dtype=int), 'domains': self.gd.get_ranks_from_positions(spos_ac), 'balanced': random_a }[self.distribution]
def test_addition_theorem(self): lmax = 9 # Test that the complex spherical harmonic addition theorem holds thetam_L = np.random.uniform(0, np.pi, size=theta_L.shape) world.broadcast(thetam_L, 0) phim_L = np.random.uniform(0, 2 * np.pi, size=phi_L.shape) world.broadcast(phim_L, 0) cosv_L = np.cos(theta_L)*np.cos(thetam_L) \ + np.sin(theta_L)*np.sin(thetam_L)*np.cos(phi_L-phim_L) P0_lL = np.array([legendre(l, 0, cosv_L) for l in range(lmax + 1)]) P_lL = np.zeros_like(P0_lL) for l, m in lmiter(lmax, comm=world): P_lL[l] += 4 * np.pi / (2*l + 1.) * Y(l, m, theta_L, phi_L) \ * Y(l, m, thetam_L, phim_L).conj() world.sum(P_lL) self.assertAlmostEqual(np.abs(P_lL - P0_lL).max(), 0, 6)
def run(psit_mG): overlap = MatrixOperator(ksl, K) if 0: overlap.work1_xG = work1_xG overlap.work2_xG = work2_xG #S_nn = np.empty((N, N)) def S(x): return x dS_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dS(a, P_ni): return np.dot(P_ni, dS_aii[a]) S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) t1 = time() if world.rank == 0: print(S_nn.round(5)) inverse_cholesky(S_nn) C_nn = S_nn t2 = time() if world.rank == 0: print('Cholesky Time %f' % (t2 - t1)) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG = overlap.matrix_multiply(C_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) assert not (P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not (P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() if world.rank == 0: for n in range(N): assert abs(S_nn[n, n] - 1.0) < 1e-10 assert not S_nn[n + 1:, n].round(10).any() return psit_mG
def test_addition_theorem(self): lmax = 9 # Test that the complex spherical harmonic addition theorem holds thetam_L = np.random.uniform(0, np.pi, size=theta_L.shape) world.broadcast(thetam_L, 0) phim_L = np.random.uniform(0, 2*np.pi, size=phi_L.shape) world.broadcast(phim_L, 0) cosv_L = np.cos(theta_L)*np.cos(thetam_L) \ + np.sin(theta_L)*np.sin(thetam_L)*np.cos(phi_L-phim_L) P0_lL = np.array([legendre(l, 0, cosv_L) for l in range(lmax+1)]) P_lL = np.zeros_like(P0_lL) for l,m in lmiter(lmax, comm=world): P_lL[l] += 4 * np.pi / (2*l + 1.) * Y(l, m, theta_L, phi_L) \ * Y(l, m, thetam_L, phim_L).conj() world.sum(P_lL) self.assertAlmostEqual(np.abs(P_lL-P0_lL).max(), 0, 6)
def gatherv(m, N=None): if world.size == 1: return m ndim = m.ndim if ndim == 2: n, N = m.shape assert n < N M = np.zeros((N, N), dtype=complex) elif ndim == 1: n = m.shape[0] M = np.zeros(N, dtype=complex) else: print('Not Implemented') XX n_index = np.zeros(size, dtype=int) world.all_gather(np.array([n]), n_index) root = 0 if rank != root: world.ssend(m, root, 112 + rank) else: for irank, n in enumerate(n_index): if irank == root: if ndim == 2: M[:n_index[0]:] = m else: M[:n_index[0]] = m else: n_start = n_index[0:irank].sum() n_end = n_index[0:irank + 1].sum() if ndim == 2: tmp_nN = np.zeros((n, N), dtype=complex) world.receive(tmp_nN, irank, 112 + irank) M[n_start:n_end, :] = tmp_nN else: tmp_n = np.zeros(n, dtype=complex) world.receive(tmp_n, irank, 112 + irank) M[n_start:n_end] = tmp_n world.broadcast(M, root) return M
def main(nbands=1000, mprocs=2, mb=64): # Set-up BlacsGrud grid = BlacsGrid(world, mprocs, mprocs) # Create descriptor nndesc = grid.new_descriptor(nbands, nbands, mb, mb) H_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero C_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero eps_N = np.empty((nbands), dtype=float) # replicated array on all MPI tasks # Fill ScaLAPACK array alpha = 0.1 # off-diagonal beta = 75.0 # diagonal uplo = 'L' # lower-triangular scalapack_set(nndesc, H_nn, alpha, beta, uplo) scalapack_zero(nndesc, H_nn, switch_uplo[uplo]) t1 = time() # either interface will work, we recommend use the latter interface # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L') nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) t2 = time() world.broadcast(eps_N, 0) # all MPI tasks now have eps_N world.barrier() # wait for everyone to finish if rank == 0: print('ScaLAPACK diagonalize_dc', t2-t1) # Create replicated NumPy array diagonal = np.eye(nbands,dtype=float) offdiagonal = np.tril(np.ones((nbands,nbands)), -1) H0 = beta*diagonal + alpha*offdiagonal E0 = np.empty((nbands), dtype=float) t1 = time() diagonalize(H0,E0) t2 = time() if rank == 0: print('LAPACK diagonalize', t2-t1) delta = abs(E0-eps_N).max() if rank == 0: print(delta) assert delta < tol
def scal_diagonalize(A, nodes='master'): # Diagonalize matrix A (size N*N) with scalapack # Usage: eps, B = scal_diagonalize(A) # eps and B and the eigenvalues and eigenvectors # nodes = 'master': eigenvectors only available on master node # nodes = 'all': eigenvectors broadcast to all nodes # make sure A is N*N, and hermitian N = A.shape[0] assert A.shape[0] == A.shape[1] for i in range(N): for j in range(i, N): assert A[i,j] == A[j,i].conj() # create blacs descriptor mb = 64 g = BlacsGrid(world, 2, size//2) nndesc1 = g.new_descriptor(N, N, N, N) nndesc2 = g.new_descriptor(N, N, mb, mb) # distribute A to blacs grid A_ if rank != 0: A = nndesc1.zeros(dtype=A.dtype) A_ = nndesc2.empty(dtype=A.dtype) redistributor = Redistributor(world, nndesc1, nndesc2) redistributor.redistribute(A, A_) # diagonalize B_ = nndesc2.zeros(dtype=A.dtype) eps = np.zeros(N,dtype=A.dtype) nndesc2.diagonalize_dc(A_, B_, eps, 'L') # distribute the eigenvectors to master B = np.zeros_like(A) redistributor = Redistributor(world, nndesc2, nndesc1) redistributor.redistribute(B_, B) if nodes == 'master': return eps, B elif nodes == 'all': if rank != 0: B = np.zeros((N, N)) world.broadcast(B, 0) return eps, B
def scal_diagonalize(A, nodes='master'): # Diagonalize matrix A (size N*N) with scalapack # Usage: eps, B = scal_diagonalize(A) # eps and B and the eigenvalues and eigenvectors # nodes = 'master': eigenvectors only available on master node # nodes = 'all': eigenvectors broadcast to all nodes # make sure A is N*N, and hermitian N = A.shape[0] assert A.shape[0] == A.shape[1] for i in range(N): for j in range(i, N): assert A[i, j] == A[j, i].conj() # create blacs descriptor mb = 64 g = BlacsGrid(world, 2, size // 2) nndesc1 = g.new_descriptor(N, N, N, N) nndesc2 = g.new_descriptor(N, N, mb, mb) # distribute A to blacs grid A_ if rank != 0: A = nndesc1.zeros(dtype=A.dtype) A_ = nndesc2.empty(dtype=A.dtype) redistributor = Redistributor(world, nndesc1, nndesc2) redistributor.redistribute(A, A_) # diagonalize B_ = nndesc2.zeros(dtype=A.dtype) eps = np.zeros(N, dtype=A.dtype) nndesc2.diagonalize_dc(A_, B_, eps, 'L') # distribute the eigenvectors to master B = np.zeros_like(A) redistributor = Redistributor(world, nndesc2, nndesc1) redistributor.redistribute(B_, B) if nodes == 'master': return eps, B elif nodes == 'all': if rank != 0: B = np.zeros((N, N)) world.broadcast(B, 0) return eps, B
def run(psit_mG): overlap = MatrixOperator(ksl, K) if 0: overlap.work1_xG = work1_xG overlap.work2_xG = work2_xG #S_nn = np.empty((N, N)) def S(x): return x dS_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dS(a, P_ni): return np.dot(P_ni, dS_aii[a]) S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) t1 = time() if world.rank == 0: print S_nn.round(5) inverse_cholesky(S_nn) C_nn = S_nn t2 = time() if world.rank == 0: print 'Cholesky Time %f' % (t2-t1) # Distribute matrix: world.broadcast(C_nn, 0) psit_mG = overlap.matrix_multiply(C_nn, psit_mG, P_ani) if world.rank == 0: print 'Made it past matrix multiply' # Check: S_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, S, dS) assert not(P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not(P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() if world.rank == 0: for n in range(N): assert abs(S_nn[n, n] - 1.0) < 1e-10 assert not S_nn[n + 1:, n].round(10).any() return psit_mG
def run(psit_mG): overlap = MatrixOperator(ksl, J) def H(psit_xG): Htpsit_xG = np.empty_like(psit_xG) kin(psit_xG, Htpsit_xG) for psit_G, y_G in zip(psit_xG, Htpsit_xG): y_G += vt_G * psit_G return Htpsit_xG dH_aii = {0: np.ones((2, 2)) * 0.123, 1: np.ones((3, 3)) * 0.321} def dH(a, P_ni): return np.dot(P_ni, dH_aii[a]) H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) t1 = time() if world.rank == 0: eps_n, H_nn = np.linalg.eigh(H_nn) H_nn = np.ascontiguousarray(H_nn.T) t2 = time() if world.rank == 0: print('Diagonalization Time %f' % (t2-t1)) print(eps_n) # Distribute matrix: world.broadcast(H_nn, 0) psit_mG = overlap.matrix_multiply(H_nn, psit_mG, P_ani) if world.rank == 0: print('Made it past matrix multiply') # Check: assert not(P_ani[0] - psit_mG[:, :2, 0, 0]).round(10).any() assert not(P_ani[1] - psit_mG[:, -1, -1, -3:]).round(10).any() H_nn = overlap.calculate_matrix_elements(psit_mG, P_ani, H, dH) if world.rank == 0: for n in range(N): assert abs(H_nn[n, n] - eps_n[n]) < 1.5e-8 assert not H_nn[n + 1:, n].round(8).any() return psit_mG
def par_load(self, filename, name): import ase.io.ulm as ulm if world.rank == 0: r = ulm.open(filename, 'r') if name == 'v_TS': self.w_T = r.w_T self.rhoG0_S = r.rhoG0_S self.df_S = r.df_S A_XS = r.A_XS r.close() else: if name == 'v_TS': self.w_T = np.zeros((self.nS), dtype=float) self.rhoG0_S = np.zeros((self.nS), dtype=complex) self.df_S = np.zeros((self.nS), dtype=float) A_XS = None world.broadcast(self.rhoG0_S, 0) world.broadcast(self.df_S, 0) if name == 'H_SS': self.H_sS = self.distribute_A_SS(A_XS) if name == 'v_TS': world.broadcast(self.w_T, 0) self.v_St = self.distribute_A_SS(A_XS, transpose=True)
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j*np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha*x dS = lambda a, P_ni: np.dot(alpha*P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, False) S_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_nn, 0) self.bd.comm.broadcast(S_nn, 0) if memstats: self.mem_test = record_memory() self.check_and_plot(S_nn, alpha*self.S0_nn, 9, 'overlaps,nonhermitian')
def test_overlaps_nonhermitian(self): alpha = np.random.normal(size=1).astype(self.dtype) if self.dtype == complex: alpha += 1j * np.random.normal(size=1) world.broadcast(alpha, 0) # Set up non-Hermitian overlap operator: S = lambda x: alpha * x dS = lambda a, P_ni: np.dot(alpha * P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, False) S_nn = overlap.calculate_matrix_elements(self.psit_nG, \ self.P_ani, S, dS).T.copy() # transpose to get <psit_m|A|psit_n> if self.bd.comm.rank == 0: self.gd.comm.broadcast(S_nn, 0) self.bd.comm.broadcast(S_nn, 0) if memstats: self.mem_test = record_memory() self.check_and_plot(S_nn, alpha * self.S0_nn, 9, 'overlaps,nonhermitian')
def test_multipole_expansion(self): lmax = 9 R = 1.0 npts = 1000 tol = 1e-9 # Solve ((R-dR)/(R+dR))**(lmax+1) = tol for dR dR = R * (1 - tol**(1. / (lmax + 1))) / (1 + tol**(1. / (lmax + 1))) assert abs(((R - dR) / (R + dR))**(lmax + 1) - tol) < 1e-12 # Test multipole expansion of 1/|r-r'| in complex spherical harmonics r_g = np.random.uniform(R + dR, 10 * R, size=npts) world.broadcast(r_g, 0) theta_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(theta_g, 0) phi_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phi_g, 0) r_vg = np.empty((3, npts), dtype=float) r_vg[0] = r_g * np.cos(phi_g) * np.sin(theta_g) r_vg[1] = r_g * np.sin(phi_g) * np.sin(theta_g) r_vg[2] = r_g * np.cos(theta_g) rm_g = np.random.uniform(0, R - dR, size=npts) world.broadcast(rm_g, 0) thetam_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(thetam_g, 0) phim_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phim_g, 0) rm_vg = np.empty((3, npts), dtype=float) rm_vg[0] = rm_g * np.cos(phim_g) * np.sin(thetam_g) rm_vg[1] = rm_g * np.sin(phim_g) * np.sin(thetam_g) rm_vg[2] = rm_g * np.cos(thetam_g) f0_g = np.sum((r_vg - rm_vg)**2, axis=0)**(-0.5) f_g = np.zeros_like(f0_g) for l, m in lmiter(lmax, comm=world): f_g += 4 * np.pi / (2*l + 1.) * r_g**(-1) * (rm_g/r_g)**l \ * Y(l, m, theta_g, phi_g) * Y(l, m, thetam_g, phim_g).conj() world.sum(f_g) e = np.abs(f_g - f0_g).max() self.assertAlmostEqual(e, 0, 9)
# ======================= R = 1.0 npts = 1000 tol = 1e-9 # ((R-dR)/(R+dR))**(lmax+1) = tol # (lmax+1)*np.log((R-dR)/(R+dR)) = np.log(tol) # (R-dR)/(R+dR) = np.exp(np.log(tol)/(lmax+1)) # R-dR = (R+dR) * tol**(1/(lmax+1)) # R * (1-tol**(1/(lmax+1))) = dR * (1+tol**(1/(lmax+1))) dR = R * (1 - tol**(1. / (lmax + 1))) / (1 + tol**(1. / (lmax + 1))) assert abs(((R - dR) / (R + dR))**(lmax + 1) - tol) < 1e-12 r_g = np.random.uniform(R + dR, 10 * R, size=npts) world.broadcast(r_g, 0) theta_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(theta_g, 0) phi_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phi_g, 0) r_vg = np.empty((3, npts), dtype=float) r_vg[0] = r_g * np.cos(phi_g) * np.sin(theta_g) r_vg[1] = r_g * np.sin(phi_g) * np.sin(theta_g) r_vg[2] = r_g * np.cos(theta_g) rm_g = np.random.uniform(0, R - dR, size=npts) world.broadcast(rm_g, 0) thetam_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(thetam_g, 0) phim_g = np.random.uniform(0, np.pi, size=npts)
def test_multipole_expansion(self): lmax = 9 R = 1.0 npts = 1000 tol = 1e-9 # Solve ((R-dR)/(R+dR))**(lmax+1) = tol for dR dR = R * (1 - tol**(1./(lmax+1))) / (1 + tol**(1./(lmax+1))) assert abs(((R-dR)/(R+dR))**(lmax+1) - tol) < 1e-12 # Test multipole expansion of 1/|r-r'| in complex spherical harmonics r_g = np.random.uniform(R+dR, 10*R, size=npts) world.broadcast(r_g, 0) theta_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(theta_g, 0) phi_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phi_g, 0) r_vg = np.empty((3, npts), dtype=float) r_vg[0] = r_g*np.cos(phi_g)*np.sin(theta_g) r_vg[1] = r_g*np.sin(phi_g)*np.sin(theta_g) r_vg[2] = r_g*np.cos(theta_g) rm_g = np.random.uniform(0, R-dR, size=npts) world.broadcast(rm_g, 0) thetam_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(thetam_g, 0) phim_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phim_g, 0) rm_vg = np.empty((3, npts), dtype=float) rm_vg[0] = rm_g*np.cos(phim_g)*np.sin(thetam_g) rm_vg[1] = rm_g*np.sin(phim_g)*np.sin(thetam_g) rm_vg[2] = rm_g*np.cos(thetam_g) f0_g = np.sum((r_vg-rm_vg)**2, axis=0)**(-0.5) f_g = np.zeros_like(f0_g) for l,m in lmiter(lmax, comm=world): f_g += 4 * np.pi / (2*l + 1.) * r_g**(-1) * (rm_g/r_g)**l \ * Y(l, m, theta_g, phi_g) * Y(l, m, thetam_g, phim_g).conj() world.sum(f_g) e = np.abs(f_g-f0_g).max() self.assertAlmostEqual(e, 0, 9)
# ======================= R = 1.0 npts = 1000 tol = 1e-9 # ((R-dR)/(R+dR))**(lmax+1) = tol # (lmax+1)*np.log((R-dR)/(R+dR)) = np.log(tol) # (R-dR)/(R+dR) = np.exp(np.log(tol)/(lmax+1)) # R-dR = (R+dR) * tol**(1/(lmax+1)) # R * (1-tol**(1/(lmax+1))) = dR * (1+tol**(1/(lmax+1))) dR = R * (1-tol**(1./(lmax+1))) / (1+tol**(1./(lmax+1))) assert abs(((R-dR)/(R+dR))**(lmax+1) - tol) < 1e-12 r_g = np.random.uniform(R+dR, 10*R, size=npts) world.broadcast(r_g, 0) theta_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(theta_g, 0) phi_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(phi_g, 0) r_vg = np.empty((3, npts), dtype=float) r_vg[0] = r_g*np.cos(phi_g)*np.sin(theta_g) r_vg[1] = r_g*np.sin(phi_g)*np.sin(theta_g) r_vg[2] = r_g*np.cos(theta_g) rm_g = np.random.uniform(0, R-dR, size=npts) world.broadcast(rm_g, 0) thetam_g = np.random.uniform(0, np.pi, size=npts) world.broadcast(thetam_g, 0) phim_g = np.random.uniform(0, np.pi, size=npts)
def reset_times(self, minwait=0.1, maxwait=0.2): if world.size == 1: return self.time_r = np.random.uniform(minwait, maxwait, size=world.size) world.broadcast(self.time_r, 0)
def expand_ibz(lU_scc, cU_scc, ibzk_kc, pbc_c=np.ones(3, bool)): """Expand IBZ from lattice group to crystal group. Parameters ---------- lU_scc : ndarray Lattice symmetry operators. cU_scc : ndarray Crystal symmetry operators. ibzk_kc : ndarray Vertices of lattice IBZ. Returns ------- ibzk_kc : ndarray Vertices of crystal IBZ. """ # Find right cosets. The lattice group is partioned into right cosets of # the crystal group. This can in practice be done by testing whether # U1 U2^{-1} is in the crystal group as done below. cosets = [] Utmp_scc = lU_scc.copy() while len(Utmp_scc): U1_cc = Utmp_scc[0].copy() Utmp_scc = np.delete(Utmp_scc, 0, axis=0) j = 0 new_coset = [U1_cc] while j < len(Utmp_scc): U2_cc = Utmp_scc[j] U3_cc = np.dot(U1_cc, np.linalg.inv(U2_cc)) if (U3_cc == cU_scc).all(2).all(1).any(): new_coset.append(U2_cc) Utmp_scc = np.delete(Utmp_scc, j, axis=0) j -= 1 j += 1 cosets.append(new_coset) volume = np.inf nibzk_kc = ibzk_kc U0_cc = cosets[0][0] # Origin if np.any(~pbc_c): nonpbcind = np.argwhere(~pbc_c) # Once the coests are known the irreducible zone is given by picking one # operation from each coset. To make sure that the IBZ produced is simply # connected we compute the volume of the convex hull of the produced IBZ # and pick (one of) the ones that have the smallest volume. This is done by # brute force and can sometimes take a while, however, in most cases this # is not a problem. combs = list(product(*cosets[1:]))[world.rank::world.size] for U_scc in combs: if not len(U_scc): continue U_scc = np.concatenate([np.array(U_scc), [U0_cc]]) tmpk_kc = unfold_points(ibzk_kc, U_scc) volumenew = convex_hull_volume(tmpk_kc) if np.any(~pbc_c): # Compute the area instead volumenew /= (tmpk_kc[:, nonpbcind].max() - tmpk_kc[:, nonpbcind].min()) if volumenew < volume: nibzk_kc = tmpk_kc volume = volumenew ibzk_kc = unique_rows(nibzk_kc) volume = np.array((volume, )) volumes = np.zeros(world.size, float) world.all_gather(volume, volumes) minrank = np.argmin(volumes) minshape = np.array(ibzk_kc.shape) world.broadcast(minshape, minrank) if world.rank != minrank: ibzk_kc = np.zeros(minshape, float) world.broadcast(ibzk_kc, minrank) return ibzk_kc
# Who has global index 11? The master needs it! i = 11 rank, ilocal = divmod(i, M) mpi_debug('rank=%d, ilocal=%d, i=%d' % (rank,ilocal,i)) assert rank*M + ilocal == i # Do I have it? if world.rank == rank: # Yes, so extract data (must be an array) idata = np.array([data[ilocal]], dtype=data.dtype) else: # No, so just allocate space idata = np.empty(1, dtype=data.dtype) # Broadcast from owner to everyone else world.broadcast(idata, rank) """ # This does the same as broadcast with send/receive... # Do I have it? if world.rank == rank: # Yes, now send it to the others for other_rank in range(world.size): # We don't have to send it to ourselves if other_rank != rank: world.send(idata, other_rank, tag=123) else: # No, so receive from the one that own the data world.receive(idata, rank, tag=123) """
def get_vchi(self, w_w=None, eta=0.1, q_c=[0.0, 0.0, 0.0], direction=0, ac=1.0, readfile=None, optical=True, write_eig=None): """Returns v * \chi where v is the bare Coulomb interaction""" self.get_bse_matrix(q_c=q_c, direction=direction, ac=ac, readfile=readfile, optical=optical, write_eig=write_eig) w_T = self.w_T rhoG0_S = self.rhoG0_S df_S = self.df_S print('Calculating response function at %s frequency points' % len(w_w), file=self.fd) vchi_w = np.zeros(len(w_w), dtype=complex) if not self.td: C_T = np.zeros(self.nS - len(self.excludef_S), complex) if world.rank == 0: A_T = np.dot(rhoG0_S, self.v_ST) B_T = np.dot(rhoG0_S * df_S, self.v_ST) tmp = np.dot(self.v_ST.conj().T, self.v_ST) overlap_tt = np.linalg.inv(tmp) C_T = np.dot(B_T.conj(), overlap_tt.T) * A_T world.broadcast(C_T, 0) else: A_t = np.dot(rhoG0_S, self.v_St) B_t = np.dot(rhoG0_S * df_S, self.v_St) if world.size == 1: C_T = B_t.conj() * A_t else: Nv = self.nv * (self.spinors + 1) Nc = self.nc * (self.spinors + 1) Ns = self.spins nS = self.nS ns = -(-self.kd.nbzkpts // world.size) * Nv * Nc * Ns grid = BlacsGrid(world, world.size, 1) desc = grid.new_descriptor(nS, 1, ns, 1) C_t = desc.empty(dtype=complex) C_t[:, 0] = B_t.conj() * A_t C_T = desc.collect_on_master(C_t)[:, 0] if world.rank != 0: C_T = np.empty(nS, dtype=complex) world.broadcast(C_T, 0) eta /= Hartree for iw, w in enumerate(w_w / Hartree): tmp_T = 1. / (w - w_T + 1j * eta) vchi_w[iw] += np.dot(tmp_T, C_T) vchi_w *= 4 * np.pi / self.vol if not np.allclose(self.q_c, 0.0): cell_cv = self.calc.wfs.gd.cell_cv B_cv = 2 * np.pi * np.linalg.inv(cell_cv).T q_v = np.dot(q_c, B_cv) vchi_w /= np.dot(q_v, q_v) """Check f-sum rule.""" nv = self.calc.wfs.setups.nvalence dw_w = (w_w[1:] - w_w[:-1]) / Hartree wchi_w = (w_w[1:] * vchi_w[1:] + w_w[:-1] * vchi_w[:-1]) / Hartree / 2 N = -np.dot(dw_w, wchi_w.imag) * self.vol / (2 * np.pi**2) print(file=self.fd) print('Checking f-sum rule:', file=self.fd) print(' Valence = %s, N = %f' % (nv, N), file=self.fd) print(file=self.fd) if write_eig is not None: if world.rank == 0: f = open(write_eig, 'w') print('# %s eigenvalues in eV' % self.mode, file=f) for iw, w in enumerate(self.w_T * Hartree): print('%8d %12.6f %12.16f' % (iw, w.real, C_T[iw].real), file=f) f.close() return vchi_w * ac