def par_write(filename, name, comm, chi0_wGG): ## support only world communicator at the moment from gpaw.mpi import rank, size, world from gpaw.io import open assert comm.size == size assert comm.rank == rank Nw_local, npw, npw1 = chi0_wGG.shape assert npw == npw1 Nw = Nw_local * size if rank == 0: w = open(filename, 'w', comm) w.dimension('Nw', Nw) w.dimension('npw', npw) w.add(name, ('Nw', 'npw', 'npw'), dtype=complex) tmp = np.zeros_like(chi0_wGG[0]) for iw in range(Nw): irank = iw // Nw_local if irank == 0: if rank == 0: w.fill(chi0_wGG[iw]) else: if rank == irank: world.send(chi0_wGG[iw - rank * Nw_local], 0, irank + 100) if rank == 0: world.receive(tmp, irank, irank + 100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def par_save(self,filename, name, A_sS): from gpaw.io import open nS_local = self.nS_local nS = self.nS if rank == 0: w = open(filename, 'w', world) w.dimension('nS', nS) if name == 'v_SS': w.add('w_S', ('nS',), dtype=self.w_S.dtype) w.fill(self.w_S) w.add('rhoG0_S', ('nS',), dtype=complex) w.fill(self.rhoG0_S) w.add(name, ('nS', 'nS'), dtype=complex) tmp = np.zeros_like(A_sS) # Assumes that H_SS is written in order from rank 0 - rank N for irank in range(size): if irank == 0: if rank == 0: w.fill(A_sS) else: if rank == irank: world.send(A_sS, 0, irank+100) if rank == 0: world.receive(tmp, irank, irank+100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def par_write(filename, name, comm, chi0_wGG): ## support only world communicator at the moment from gpaw.mpi import rank, size, world from gpaw.io import open assert comm.size == size assert comm.rank == rank Nw_local, npw, npw1 = chi0_wGG.shape assert npw == npw1 Nw = Nw_local * size w = open(filename, 'w', comm) w.dimension('Nw', Nw) w.dimension('npw', npw) w.add(name, ('Nw', 'npw', 'npw'), dtype=complex) if rank == 0: tmp = np.zeros_like(chi0_wGG[0]) for iw in range(Nw): irank = iw // Nw_local if irank == 0: if rank == 0: w.fill(chi0_wGG[iw]) else: if rank == irank: world.send(chi0_wGG[iw-rank*Nw_local], 0, irank+100) if rank == 0: world.receive(tmp, irank, irank+100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def par_save(self, filename, name, A_sS): from gpaw.io import open nS = self.nS if rank == 0: w = open(filename, 'w', world) w.dimension('nS', nS) if name == 'v_SS': w.add('w_S', ('nS', ), dtype=self.w_S.dtype) w.fill(self.w_S) w.add('rhoG0_S', ('nS', ), dtype=complex) w.fill(self.rhoG0_S) w.add(name, ('nS', 'nS'), dtype=complex) tmp = np.zeros_like(A_sS) # Assumes that H_SS is written in order from rank 0 - rank N for irank in range(size): if irank == 0: if rank == 0: w.fill(A_sS) else: if rank == irank: world.send(A_sS, 0, irank + 100) if rank == 0: world.receive(tmp, irank, irank + 100) w.fill(tmp) if rank == 0: w.close() world.barrier()
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if self.calc.wfs.kpt_comm.size != world.size or world.size == 1: if check_focc == False: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if not self.calc.wfs.world.rank == 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: if self.nkpt % size != 0: raise ValueError( 'The number of kpoints should be divided by the number of cpus for no wfs dumping mode ! ' ) # support ground state calculation with only kpoint parallelization kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = rank klist = np.array([kpt_rank, u, bzkpt_rank, n]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i] == True: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300 + bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty( dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300 + bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if (self.calc.wfs.world.size == 1 or self.calc.wfs.gd.comm.size != 1 or self.calc.input_parameters['mode'] == 'lcao'): if not check_focc: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if self.calc.wfs.world.rank != 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: # support ground state calculation with kpoint and band parallelization # but domain decomposition must = 1 kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = self.kcomm.rank band_rank, myn = self.calc.wfs.bd.who_has(n) assert self.calc.wfs.gd.comm.size == 1 world_rank = (kpt_rank * self.calc.wfs.band_comm.size + band_rank) # in the following, kpt_rank is assigned to world_rank klist = np.array([world_rank, u, bzkpt_rank, myn]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i]: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300 + bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty( dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300 + bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if (self.calc.wfs.world.size == 1 or self.calc.wfs.gd.comm.size != 1 or self.calc.input_parameters['mode'] == 'lcao'): if not check_focc: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if self.calc.wfs.world.rank != 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: # support ground state calculation with kpoint and band parallelization # but domain decomposition must = 1 kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = self.kcomm.rank band_rank, myn = self.calc.wfs.bd.who_has(n) assert self.calc.wfs.gd.comm.size == 1 world_rank = (kpt_rank * self.calc.wfs.band_comm.size + band_rank) # in the following, kpt_rank is assigned to world_rank klist = np.array([world_rank, u, bzkpt_rank, myn]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i]: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300+bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300+bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def get_wavefunction(self, ibzk, n, check_focc=True, spin=0): if self.calc.wfs.kpt_comm.size != world.size or world.size == 1: if check_focc == False: return else: psit_G = self.calc.wfs.get_wave_function_array(n, ibzk, spin) if self.calc.wfs.world.size == 1: return np.complex128(psit_G) if not self.calc.wfs.world.rank == 0: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype, global_array=True) self.calc.wfs.world.broadcast(psit_G, 0) return np.complex128(psit_G) else: if self.nkpt % size != 0: raise ValueError('The number of kpoints should be divided by the number of cpus for no wfs dumping mode ! ') # support ground state calculation with only kpoint parallelization kpt_rank, u = self.calc.wfs.kd.get_rank_and_index(0, ibzk) bzkpt_rank = rank klist = np.array([kpt_rank, u, bzkpt_rank, n]) klist_kcomm = np.zeros((self.kcomm.size, 4), dtype=int) self.kcomm.all_gather(klist, klist_kcomm) check_focc_global = np.zeros(self.kcomm.size, dtype=bool) self.kcomm.all_gather(np.array([check_focc]), check_focc_global) psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) for i in range(self.kcomm.size): if check_focc_global[i] == True: kpt_rank, u, bzkpt_rank, nlocal = klist_kcomm[i] if kpt_rank == bzkpt_rank: if rank == kpt_rank: psit_G = self.calc.wfs.kpt_u[u].psit_nG[nlocal] else: if rank == kpt_rank: world.send(self.calc.wfs.kpt_u[u].psit_nG[nlocal], bzkpt_rank, 1300+bzkpt_rank) if rank == bzkpt_rank: psit_G = self.calc.wfs.gd.empty(dtype=self.calc.wfs.dtype) world.receive(psit_G, kpt_rank, 1300+bzkpt_rank) self.wScomm.broadcast(psit_G, 0) return psit_G
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u, n, a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u, n, a:a + 1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def update_references(self, kpt_u, rank_a): requests = [] kpt_comm, band_comm, domain_comm = self.kd_old.comm, self.bd.comm, self.gd.comm for u in range(self.kd_old.nks): kpt_rank, myu = self.kd_old.who_has(u) for n in range(self.bd.nbands): band_rank, myn = self.bd.who_has(n) for a in range(self.natoms): domain_rank = rank_a[a] if kpt_comm.rank == kpt_rank and \ band_comm.rank == band_rank and \ domain_comm.rank == domain_rank: kpt = kpt_u[myu] chk = md5_array(kpt.P_ani[a][myn], numeric=True) if world.rank == 0: self.chk_una[u,n,a] = chk else: requests.append(world.send(np.array([chk], \ dtype=np.int64), 0, 1303+a, block=False)) elif world.rank == 0: world_rank = rank_a[a] + \ band_rank * domain_comm.size + \ kpt_rank * domain_comm.size * band_comm.size chk = self.chk_una[u,n,a:a+1] #XXX hack to get pointer requests.append(world.receive(chk, world_rank, \ 1303+a, block=False)) world.waitall(requests) world.broadcast(self.chk_una, 0)
def collect_A_SS(self, A_sS): if world.rank == 0: A_SS = np.zeros((self.nS, self.nS), dtype=complex) A_SS[:len(A_sS)] = A_sS Ntot = len(A_sS) for rank in range(1, world.size): nkr, nk, ns = self.parallelisation_sizes(rank) buf = np.empty((ns, self.nS), dtype=complex) world.receive(buf, rank, tag=123) A_SS[Ntot:Ntot + ns] = buf Ntot += ns else: world.send(A_sS, 0, tag=123) world.barrier() if world.rank == 0: return A_SS
def distribute_A_SS(self, A_SS, transpose=False): if world.rank == 0: for rank in range(0, world.size): nkr, nk, ns = self.parallelisation_sizes(rank) if rank == 0: A_sS = A_SS[0:ns] Ntot = ns else: world.send(A_SS[Ntot:Ntot + ns], rank, tag=123) Ntot += ns else: nkr, nk, ns = self.parallelisation_sizes() A_sS = np.empty((ns, self.nS), dtype=complex) world.receive(A_sS, 0, tag=123) world.barrier() if transpose: A_sS = A_sS.T return A_sS
cell.shape = (3, 3) positions.shape = (natoms, 3) atomic_symbols = ''.join( [ase.chemical_symbols[int(i)] for i in atomic_numbers]) atoms = ase.Atoms(atomic_symbols, positions=positions, cell=cell, pbc=pbc) calc = create_gpaw(my_comm) atoms.set_calculator(calc) else: atoms.set_positions(positions) calculation_failed = numpy.array((0, ), 'i') try: f1 = atoms.get_forces() e1 = atoms.get_potential_energy() e1 = numpy.array([ e1, ]) except gpaw.KohnShamConvergenceError: calculation_failed = numpy.array(1, 'i') if my_comm.rank == 0: world.send(calculation_failed, my_client_rank, tag=0) if not calculation_failed: world.send(e1, my_client_rank, tag=0) world.send(f1, my_client_rank, tag=0)
if first_time: atomic_symbols = ''.join([ ase.chemical_symbols[int(i)] for i in atomic_numbers]) atoms = ase.Atoms(atomic_symbols, positions=positions, cell=cell, pbc=pbc) calc = create_gpaw(my_comm) atoms.set_calculator(calc) first_time = False else: atoms.set_positions(positions) logfile = os.path.join(logdir, "gpaw_%i.txt"%nforce_calls) calc.set(txt=logfile) calculation_failed = numpy.array((0,),'i') try: f1 = atoms.get_forces() e1 = atoms.get_potential_energy() e1 = numpy.array([e1,]) except gpaw.KohnShamConvergenceError: calculation_failed = numpy.array(1,'i') t1 = time.time() if my_comm.rank == 0: performance_log = os.path.join(logdir, "performance.txt") fperformance = open(performance_log, "a+") fperformance.write("%i %.3f\n" % (nforce_calls, (t1-t0))) fperformance.close() world.send(calculation_failed, my_client_rank, tag=0) if not calculation_failed: world.send(e1, my_client_rank, tag=0) world.send(f1, my_client_rank, tag=0)
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition( nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq*world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ('nbzq', 'natoms', 'nG', 'nii',), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) world.barrier() if world.rank == 0: w.close() return
def get_phi_qaGp(self): N1_max = 0 N2_max = 0 natoms = len(self.calc.wfs.setups) for id in range(natoms): N1 = self.npw N2 = self.calc.wfs.setups[id].ni**2 if N1 > N1_max: N1_max = N1 if N2 > N2_max: N2_max = N2 nbzq = self.kd.nbzkpts nbzq, nq_local, q_start, q_end = parallel_partition(nbzq, world.rank, world.size, reshape=False) phimax_qaGp = np.zeros((nq_local, natoms, N1_max, N2_max), dtype=complex) #phimax_qaGp = np.zeros((nbzq, natoms, N1_max, N2_max), dtype=complex) t0 = time() for iq in range(nq_local): q_c = self.bzq_qc[iq + q_start] tmp_aGp = self.get_phi_aGp(q_c, parallel=False) for id in range(natoms): N1, N2 = tmp_aGp[id].shape phimax_qaGp[iq, id, :N1, :N2] = tmp_aGp[id] self.timing(iq * world.size, t0, nq_local, 'iq') world.barrier() # Write to disk filename = 'phi_qaGp' if world.rank == 0: w = Writer(filename) w.dimension('nbzq', nbzq) w.dimension('natoms', natoms) w.dimension('nG', N1_max) w.dimension('nii', N2_max) w.add('phi_qaGp', ( 'nbzq', 'natoms', 'nG', 'nii', ), dtype=complex) for q in range(nbzq): residual = nbzq % size N_local = nbzq // size if q < residual * (N_local + 1): qrank = q // (N_local + 1) else: qrank = (q - residual * (N_local + 1)) // N_local + residual if qrank == 0: if world.rank == 0: phi_aGp = phimax_qaGp[q - q_start] else: if world.rank == qrank: phi_aGp = phimax_qaGp[q - q_start] world.send(phi_aGp, 0, q) elif world.rank == 0: world.receive(phi_aGp, qrank, q) if world.rank == 0: w.fill(phi_aGp) if world.rank == 0: w.close() world.barrier()