def par_save(self,filename, name, A_sS): from gpaw.io import open nS_local = self.nS_local nS = self.nS if rank == 0: w = open(filename, 'w', world) w.dimension('nS', nS) if name == 'v_SS': w.add('w_S', ('nS',), dtype=self.w_S.dtype) w.fill(self.w_S) w.add('rhoG0_S', ('nS',), dtype=complex) w.fill(self.rhoG0_S) w.add(name, ('nS', 'nS'), dtype=complex) tmp = np.zeros_like(A_sS) # Assumes that H_SS is written in order from rank 0 - rank N for irank in range(size): if irank == 0: if rank == 0: w.fill(A_sS) else: if rank == irank: world.send(A_sS, 0, irank+100) if rank == 0: world.receive(tmp, irank, irank+100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def collect_orbitals(a_xo, coords, root=0): """Collect array distributed over orbitals to root-CPU. Input matrix has last axis distributed amongst CPUs, return is None on slaves, and the collected array on root. The distribution can be uneven amongst CPUs. The list coords gives the number of values for each CPU. """ a_xo = np.ascontiguousarray(a_xo) if world.size == 1: return a_xo # All slaves send their piece to ``root``: # There can be several sends before the corresponding receives # are posted, so use syncronous send here if world.rank != root: world.ssend(a_xo, root, 112) return None # On root, put the subdomains from the slaves into the big array # for the whole domain on root: xshape = a_xo.shape[:-1] Norb2 = sum(coords) # total number of orbital indices a_xO = np.empty(xshape + (Norb2,), a_xo.dtype) o = 0 for rank, norb in enumerate(coords): if rank != root: tmp_xo = np.empty(xshape + (norb,), a_xo.dtype) world.receive(tmp_xo, rank, 112) a_xO[..., o:o + norb] = tmp_xo else: a_xO[..., o:o + norb] = a_xo o += norb return a_xO
def par_write(filename, name, comm, chi0_wGG): ## support only world communicator at the moment from gpaw.mpi import rank, size, world from gpaw.io import open assert comm.size == size assert comm.rank == rank Nw_local, npw, npw1 = chi0_wGG.shape assert npw == npw1 Nw = Nw_local * size w = open(filename, 'w', comm) w.dimension('Nw', Nw) w.dimension('npw', npw) w.add(name, ('Nw', 'npw', 'npw'), dtype=complex) if rank == 0: tmp = np.zeros_like(chi0_wGG[0]) for iw in range(Nw): irank = iw // Nw_local if irank == 0: if rank == 0: w.fill(chi0_wGG[iw]) else: if rank == irank: world.send(chi0_wGG[iw-rank*Nw_local], 0, irank+100) if rank == 0: world.receive(tmp, irank, irank+100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def par_save(self, filename, name, A_sS): from gpaw.io import open nS = self.nS if rank == 0: w = open(filename, 'w', world) w.dimension('nS', nS) if name == 'v_SS': w.add('w_S', ('nS', ), dtype=self.w_S.dtype) w.fill(self.w_S) w.add('rhoG0_S', ('nS', ), dtype=complex) w.fill(self.rhoG0_S) w.add(name, ('nS', 'nS'), dtype=complex) tmp = np.zeros_like(A_sS) # Assumes that H_SS is written in order from rank 0 - rank N for irank in range(size): if irank == 0: if rank == 0: w.fill(A_sS) else: if rank == irank: world.send(A_sS, 0, irank + 100) if rank == 0: world.receive(tmp, irank, irank + 100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def par_write(filename, name, comm, chi0_wGG): ## support only world communicator at the moment from gpaw.mpi import rank, size, world from gpaw.io import open assert comm.size == size assert comm.rank == rank Nw_local, npw, npw1 = chi0_wGG.shape assert npw == npw1 Nw = Nw_local * size if rank == 0: w = open(filename, 'w', comm) w.dimension('Nw', Nw) w.dimension('npw', npw) w.add(name, ('Nw', 'npw', 'npw'), dtype=complex) tmp = np.zeros_like(chi0_wGG[0]) for iw in range(Nw): irank = iw // Nw_local if irank == 0: if rank == 0: w.fill(chi0_wGG[iw]) else: if rank == irank: world.send(chi0_wGG[iw - rank * Nw_local], 0, irank + 100) if rank == 0: world.receive(tmp, irank, irank + 100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if self.calc.wfs.kpt_comm.size != world.size or world.size == 1: if check_focc == False: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if not self.calc.wfs.world.rank == 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: if self.nkpt % size != 0: raise ValueError( 'The number of kpoints should be divided by the number of cpus for no wfs dumping mode ! ' ) # support ground state calculation with only kpoint parallelization kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = rank klist = np.array([kpt_rank, u, bzkpt_rank, n]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i] == True: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300 + bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty( dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300 + bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if (self.calc.wfs.world.size == 1 or self.calc.wfs.gd.comm.size != 1 or self.calc.input_parameters['mode'] == 'lcao'): if not check_focc: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if self.calc.wfs.world.rank != 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: # support ground state calculation with kpoint and band parallelization # but domain decomposition must = 1 kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = self.kcomm.rank band_rank, myn = self.calc.wfs.bd.who_has(n) assert self.calc.wfs.gd.comm.size == 1 world_rank = (kpt_rank * self.calc.wfs.band_comm.size + band_rank) # in the following, kpt_rank is assigned to world_rank klist = np.array([world_rank, u, bzkpt_rank, myn]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i]: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300 + bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty( dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300 + bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if (self.calc.wfs.world.size == 1 or self.calc.wfs.gd.comm.size != 1 or self.calc.input_parameters['mode'] == 'lcao'): if not check_focc: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if self.calc.wfs.world.rank != 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: # support ground state calculation with kpoint and band parallelization # but domain decomposition must = 1 kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = self.kcomm.rank band_rank, myn = self.calc.wfs.bd.who_has(n) assert self.calc.wfs.gd.comm.size == 1 world_rank = (kpt_rank * self.calc.wfs.band_comm.size + band_rank) # in the following, kpt_rank is assigned to world_rank klist = np.array([world_rank, u, bzkpt_rank, myn]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i]: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300+bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300+bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if self.calc.wfs.kpt_comm.size != world.size or world.size == 1: if check_focc == False: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if not self.calc.wfs.world.rank == 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: if self.nkpt % size != 0: raise ValueError('The number of kpoints should be divided by the number of cpus for no wfs dumping mode ! ') # support ground state calculation with only kpoint parallelization kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = rank klist = np.array([kpt_rank, u, bzkpt_rank, n]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i] == True: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300+bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300+bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u,n,a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u,n,a:a+1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def collect_A_SS(self, A_sS): if world.rank == 0: A_SS = np.zeros((self.nS, self.nS), dtype=complex) A_SS[:len(A_sS)] = A_sS Ntot = len(A_sS) for rank in range(1, world.size): nkr, nk, ns = self.parallelisation_sizes(rank) buf = np.empty((ns, self.nS), dtype=complex) world.receive(buf, rank, tag=123) A_SS[Ntot:Ntot + ns] = buf Ntot += ns else: world.send(A_sS, 0, tag=123) world.barrier() if world.rank == 0: return A_SS
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u, n, a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u, n, a:a + 1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def gatherv(m, N=None): from gpaw.mpi import world, size, rank if world.size == 1: return m ndim = m.ndim if ndim == 2: n, N = m.shape assert n < N M = np.zeros((N, N), dtype=complex) elif ndim == 1: n = m.shape[0] M = np.zeros(N, dtype=complex) else: print 'Not Implemented' XX n_index = np.zeros(size, dtype=int) world.all_gather(np.array([n]), n_index) root = 0 if rank != root: world.ssend(m, root, 112+rank) else: for irank, n in enumerate(n_index): if irank == root: if ndim == 2: M[:n_index[0] :] = m else: M[:n_index[0]] = m else: n_start = n_index[0:irank].sum() n_end = n_index[0:irank+1].sum() if ndim == 2: tmp_nN = np.zeros((n, N), dtype=complex) world.receive(tmp_nN, irank, 112+irank) M[n_start:n_end, :] = tmp_nN else: tmp_n = np.zeros(n, dtype=complex) world.receive(tmp_n, irank, 112+irank) M[n_start:n_end] = tmp_n world.broadcast(M, root) return M
def gatherv(m, N=None): if world.size == 1: return m ndim = m.ndim if ndim == 2: n, N = m.shape assert n < N M = np.zeros((N, N), dtype=complex) elif ndim == 1: n = m.shape[0] M = np.zeros(N, dtype=complex) else: print('Not Implemented') XX n_index = np.zeros(size, dtype=int) world.all_gather(np.array([n]), n_index) root = 0 if rank != root: world.ssend(m, root, 112 + rank) else: for irank, n in enumerate(n_index): if irank == root: if ndim == 2: M[:n_index[0]:] = m else: M[:n_index[0]] = m else: n_start = n_index[0:irank].sum() n_end = n_index[0:irank + 1].sum() if ndim == 2: tmp_nN = np.zeros((n, N), dtype=complex) world.receive(tmp_nN, irank, 112 + irank) M[n_start:n_end, :] = tmp_nN else: tmp_n = np.zeros(n, dtype=complex) world.receive(tmp_n, irank, 112 + irank) M[n_start:n_end] = tmp_n world.broadcast(M, root) return M
def distribute_A_SS(self, A_SS, transpose=False): if world.rank == 0: for rank in range(0, world.size): nkr, nk, ns = self.parallelisation_sizes(rank) if rank == 0: A_sS = A_SS[0:ns] Ntot = ns else: world.send(A_SS[Ntot:Ntot + ns], rank, tag=123) Ntot += ns else: nkr, nk, ns = self.parallelisation_sizes() A_sS = np.empty((ns, self.nS), dtype=complex) world.receive(A_sS, 0, tag=123) world.barrier() if transpose: A_sS = A_sS.T return A_sS
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition(nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq * world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ( 'nbzq', 'natoms', 'nG', 'nii', ), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) if world.rank == 0: w.close() world.barrier()
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition( nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq*world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ('nbzq', 'natoms', 'nG', 'nii',), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) world.barrier() if world.rank == 0: w.close() return
my_potential_rank = world.rank - first_potential_rank my_client_rank = client_ranks[my_potential_rank / potential_group_size] print "pot: rank: %i my_client_rank: %i" % (world.rank, my_client_rank) for i in xrange(clients): s = potential_group_size new_comm = world.new_communicator(potential_ranks[i * s:i * s + s]) if new_comm != None: my_comm = new_comm first_time = True while True: natoms = numpy.array((0, ), 'i') if my_comm.rank == 0: world.receive(natoms, my_client_rank, tag=0) my_comm.broadcast(natoms, 0) atomic_numbers = numpy.zeros(natoms, 'i') positions = numpy.zeros(3 * natoms, 'd') cell = numpy.zeros(9, 'd') pbc = numpy.array((0, ), 'i') if my_comm.rank == 0: world.receive(atomic_numbers, my_client_rank, tag=0) world.receive(positions, my_client_rank, tag=0) world.receive(cell, my_client_rank, tag=0) world.receive(pbc, my_client_rank, tag=0) my_comm.broadcast(atomic_numbers, 0) my_comm.broadcast(positions, 0) my_comm.broadcast(cell, 0) my_comm.broadcast(pbc, 0)
#print "pot: rank: %i my_client_rank: %i" % (world.rank, my_client_rank) for i in xrange(clients): s = potential_group_size new_comm = world.new_communicator(numpy.array(potential_ranks[i*s:i*s+s], dtype='i')) if new_comm != None: my_comm = new_comm first_time = True nforce_calls = 0 performance_log = "" while True: nforce_calls += 1 natoms = numpy.array((0,), 'i') if my_comm.rank == 0: world.receive(natoms, my_client_rank, tag=0) my_comm.broadcast(natoms, 0) atomic_numbers = numpy.zeros(natoms, 'i') positions = numpy.zeros(3*natoms, 'd') cell = numpy.zeros(9, 'd') pbc = numpy.array((0,), 'i') logdir = numpy.zeros(1024, 'l') if my_comm.rank == 0: world.receive(atomic_numbers, my_client_rank, tag=0) world.receive(positions, my_client_rank, tag=0) world.receive(cell, my_client_rank, tag=0) world.receive(pbc, my_client_rank, tag=0) world.receive(logdir, my_client_rank, tag=0) my_comm.broadcast(atomic_numbers, 0) my_comm.broadcast(positions, 0)