def testHandleValue(self): typemap = {ctypes.sizeof(ctypes.c_uint32): ctypes.c_uint32, ctypes.sizeof(ctypes.c_uint64): ctypes.c_uint64} for obj in self.objects: uintptr_t = typemap[MPI._sizeof(obj)] handle = uintptr_t.from_address(MPI._addressof(obj)) self.assertEqual(handle.value, MPI._handleof(obj))
def testAHandleOf(self): for obj in self.objects: if isinstance(obj, MPI.Status): hdl = lambda: MPI._handleof(obj) self.assertRaises(NotImplementedError, hdl) continue hdl = MPI._handleof(obj)
def testHandleValue(self): ffi = cffi.FFI() typemap = {ffi.sizeof('uint32_t'): 'uint32_t', ffi.sizeof('uint64_t'): 'uint64_t',} for obj in self.objects: uintptr_t = typemap[MPI._sizeof(obj)] handle = ffi.cast(uintptr_t+'*', MPI._addressof(obj))[0] self.assertEqual(handle, MPI._handleof(obj))
def ncmpi_open(name): comm_ptr = MPI._addressof(MPI.COMM_WORLD) comm_val = MPI_Comm.from_address(comm_ptr) info_ptr = MPI._addressof(MPI.INFO_NULL) info_val = MPI_Comm.from_address(info_ptr) ncid = c_int() retval = _ncmpi_open(comm_val, name, NC_NOWRITE, info_val, byref(ncid)) errcheck(retval) return ncid.value
def testHandleAdress(self): typemap = {ctypes.sizeof(ctypes.c_int): ctypes.c_int, ctypes.sizeof(ctypes.c_void_p): ctypes.c_void_p} for obj in self.objects: handle_t = typemap[MPI._sizeof(obj)] oldobj = obj newobj = type(obj)() handle_old = handle_t.from_address(MPI._addressof(oldobj)) handle_new = handle_t.from_address(MPI._addressof(newobj)) handle_new.value = handle_old.value self.assertEqual(obj, newobj)
def check_mpi(): mpiexec_path, _ = os.path.split(distutils.spawn.find_executable("mpiexec")) for executable, path in mpi4py.get_config().items(): if executable not in ['mpicc', 'mpicxx', 'mpif77', 'mpif90', 'mpifort']: continue if mpiexec_path not in path: raise ImportError("mpi4py may not be configured against the same version of 'mpiexec' that you are using. The 'mpiexec' path is {mpiexec_path} and mpi4py.get_config() returns:\n{mpi4py_config}\n".format(mpiexec_path=mpiexec_path, mpi4py_config=mpi4py.get_config())) if 'Open MPI' not in MPI.get_vendor(): raise ImportError("mpi4py must have been installed against Open MPI in order for StructOpt to function correctly.") vendor_number = ".".join([str(x) for x in MPI.get_vendor()[1]]) if vendor_number not in mpiexec_path: raise ImportError("The MPI version that mpi4py was compiled against does not match the version of 'mpiexec'. mpi4py's version number is {}, and mpiexec's path is {}".format(MPI.get_vendor(), mpiexec_path))
def ncmpi_open(name): if sys.version_info >= (3,0,0): name = bytes(name, 'utf-8') comm_ptr = MPI._addressof(MPI.COMM_WORLD) comm_val = MPI_Comm.from_address(comm_ptr) info_ptr = MPI._addressof(MPI.INFO_NULL) info_val = MPI_Info.from_address(info_ptr) ncid = c_int() retval = _ncmpi_open(comm_val, name, NC_NOWRITE, info_val, byref(ncid)) # print("TEST") errcheck(retval) # print("TEST") return ncid.value
def main(split_into=2, nloops=3): world = MPI.COMM_WORLD rank = world.Get_rank() size = world.Get_size() if size < split_into: raise ValueError("The number of cores passed to 'mpiexec' must be greater than the number of desired communicators.") cores_per_comm = size // split_into # Create fake data for input for each of the different processes we will spawn multipliers = [i+1 for i in range(split_into)] if 'Open MPI' not in MPI.get_vendor(): colors = [(i+1)//split_into for i in range(split_into)] data_by_process = [(str(multipliers[i]), str(colors[i])) for i in range(split_into)] else: data_by_process = [(str(multipliers[i]),) for i in range(split_into)] if rank == 0: print("At each iteration we will spawn {} workers with {} cores each out of a total of {} cores.".format(split_into, cores_per_comm, size)) print("Those {} split communicators will get the following as input:".format(split_into)) for i in range(split_into): print(" Communicator {}: {}".format(i, data_by_process[i])) for i in range(nloops): print("Iteration {}...".format(i)) spawn_multiple(split_into, cores_per_comm, data_by_process)
def getlibraryinfo(): from mpi4py import MPI info = "MPI %d.%d" % MPI.Get_version() name, version = MPI.get_vendor() if name != "unknown": info += (" (%s %s)" % (name, '%d.%d.%d' % version)) return info
def __init__(self, comm=None): if comm is None: # Should only end up here upon unpickling comm = MPI.COMM_WORLD comm_ptr = MPI._addressof(comm) comm_val = self.dtype.from_address(comm_ptr) self.value = comm_val
def is_mpd_running(): name_of_the_vendor, version = MPI.get_vendor() if name_of_the_vendor == 'MPICH2': process = subprocess.Popen(['mpdtrace'], stdout = subprocess.PIPE, stderr = subprocess.PIPE) (output_string, error_string) = process.communicate() return not (process.returncode == 255) else: return True
def setup_md(self, icomm_grid, xyzL, xyz_orig): """ setup_md(self, dt, icomm_grid, xyzL, xyz_orig) Keyword arguments: real -- the real part (default 0.0) imag -- the imaginary part (default 0.0) """ self.py_setup_md(MPI._handleof(icomm_grid), xyzL, xyz_orig)
def ensure_mpd_is_running(): if not is_mpd_running(): name_of_the_vendor, version = MPI.get_vendor() if name_of_the_vendor == "MPICH2": try: process = subprocess.Popen(["nohup", "mpd"], close_fds=True) except OSError as ex: pass
def _buffer_from_gpuarray(self, array): data = array.gpudata # data might be an `int` or `DeviceAllocation` if isinstance(data, cuda.DeviceAllocation): return data.as_buffer(array.nbytes) else: # construct the buffer return MPI.make_buffer(array.gpudata, array.nbytes)
def send(data, data_package, dest=None, gpu_direct=True): global s_requests tag = 52 dp = data_package # send data_package send_data_package(dp, dest=dest, tag=tag) bytes = dp.data_bytes memory_type = dp.memory_type if log_type in ['time','all']: st = time.time() flag = False request = None if memory_type == 'devptr': # data in the GPU if gpu_direct: # want to use GPU direct devptr = data buf = MPI.make_buffer(devptr.__int__(), bytes) ctx.synchronize() request = comm.Isend([buf, MPI.BYTE], dest=dest, tag=57) if VIVALDI_BLOCKING: MPI.Request.Wait(request) s_requests.append((request, buf, devptr)) flag = True else:# not want to use GPU direct # copy to CPU shape = dp.data_memory_shape dtype = dp.data_contents_memory_dtype buf = numpy.empty(shape, dtype=dtype) cuda.memcpy_dtoh_async(buf, data, stream=stream_list[1]) request = comm.Isend(buf, dest=dest, tag=57) if VIVALDI_BLOCKING: MPI.Request.Wait(request) s_requests.append((request, buf, None)) else: # data in the CPU # want to use GPU direct, not exist case # not want to use GPU direct if dp.data_dtype == numpy.ndarray: request = comm.Isend(data, dest=dest, tag=57) if VIVALDI_BLOCKING: MPI.Request.Wait(request) s_requests.append((request, data, None)) if log_type in ['time','all']: u = dp.unique_id bytes = dp.data_bytes t = MPI.Wtime()-st ms = 1000*t bw = bytes/GIGA/t if flag: log("rank%d, \"%s\", u=%d, from rank%d to rank%d GPU direct send, Bytes: %dMB, time: %.3f ms, speed: %.3f GByte/sec"%(rank, name, u, rank, dest, bytes/MEGA, ms, bw),'time', log_type) else: log("rank%d, \"%s\", u=%d, from rank%d to rank%d MPI data transfer, Bytes: %dMB, time: %.3f ms, speed: %.3f GByte/sec"%(rank, name, u, rank, dest, bytes/MEGA, ms, bw),'time', log_type) return request
def set_default_mpi_parameters(parameters): # If mpi4py is used, make sure we can import it and set the rank/size for all cores in the parameters.mpi use_mpi4py = True if 'relaxations' in parameters: for module in parameters.relaxations: parameters.relaxations[module].setdefault('use_mpi4py', False) parameters.relaxations[module].setdefault('MPMD', 0) if parameters.relaxations[module].use_mpi4py: use_mpi4py = True if 'fitnesses' in parameters: for module in parameters.fitnesses: parameters.fitnesses[module].setdefault('use_mpi4py', False) parameters.fitnesses[module].setdefault('MPMD', 0) if parameters.fitnesses[module].use_mpi4py: use_mpi4py = True parameters.setdefault('mpi', {}) if use_mpi4py: try: import mpi4py except ImportError: raise ImportError("mpi4py must be installed to use StructOpt.") mpiexec_path, _ = os.path.split(distutils.spawn.find_executable("mpiexec")) for executable, path in mpi4py.get_config().items(): if executable not in ['mpicc', 'mpicxx', 'mpif77', 'mpif90', 'mpifort']: continue if mpiexec_path not in path: raise ImportError("mpi4py may not be configured against the same version of 'mpiexec' that you are using. The 'mpiexec' path is {mpiexec_path} and mpi4py.get_config() returns:\n{mpi4py_config}\n".format(mpiexec_path=mpiexec_path, mpi4py_config=mpi4py.get_config())) from mpi4py import MPI if 'Open MPI' not in MPI.get_vendor(): raise ImportError("mpi4py must have been installed against Open MPI in order for StructOpt to function correctly.") vendor_number = ".".join([str(x) for x in MPI.get_vendor()[1]]) if vendor_number not in mpiexec_path: raise ImportError("The MPI version that mpi4py was compiled against does not match the version of 'mpiexec'. mpi4py's version number is {}, and mpiexec's path is {}".format(MPI.get_vendor(), mpiexec_path)) parameters.mpi.rank = MPI.COMM_WORLD.Get_rank() parameters.mpi.ncores = MPI.COMM_WORLD.Get_size() else: parameters.mpi.rank = 0 parameters.mpi.ncores = 1 return parameters
def testHandleAddress(self): ffi = cffi.FFI() typemap = {ffi.sizeof('int'): 'int', ffi.sizeof('void*'): 'void*'} typename = lambda t: t.__name__.rsplit('.', 1)[-1] for tp in self.mpitypes: handle_t = typemap[MPI._sizeof(tp)] mpi_t = 'MPI_' + typename(tp) ffi.cdef("typedef %s %s;" % (handle_t, mpi_t)) for obj in self.objects: if isinstance(obj, MPI.Comm): mpi_t = 'MPI_Comm' else: mpi_t = 'MPI_' + typename(type(obj)) oldobj = obj newobj = type(obj)() handle_old = ffi.cast(mpi_t+'*', MPI._addressof(oldobj)) handle_new = ffi.cast(mpi_t+'*', MPI._addressof(newobj)) handle_new[0] = handle_old[0] self.assertEqual(oldobj, newobj)
def init(self, calling_realm): # Build a communicator mpi4py python object from the # handle returned by the CPL_init function. if MPI._sizeof(MPI.Comm) == ctypes.sizeof(c_int): MPI_Comm = c_int else: MPI_Comm = c_void_p # Call create comm returned_realm_comm = c_int() self._py_init(calling_realm, byref(returned_realm_comm)) # Use an intracomm object as the template and override value newcomm = MPI.Intracomm() newcomm_ptr = MPI._addressof(newcomm) comm_val = MPI_Comm.from_address(newcomm_ptr) comm_val.value = returned_realm_comm.value return newcomm
def testGetEnvelope(self): for dtype in datatypes: try: envelope = dtype.Get_envelope() except NotImplementedError: return if ('LAM/MPI' == MPI.get_vendor()[0] and "COMPLEX" in dtype.name): continue ni, na, nd, combiner = envelope self.assertEqual(combiner, MPI.COMBINER_NAMED) self.assertEqual(ni, 0) self.assertEqual(na, 0) self.assertEqual(nd, 0)
def testGetEnvelope(self): for dtype in datatypes: try: envelope = dtype.Get_envelope() except NotImplementedError: self.skipTest('mpi-type-get_envelope') if ('LAM/MPI' == MPI.get_vendor()[0] and "COMPLEX" in dtype.name): continue ni, na, nd, combiner = envelope self.assertEqual(combiner, MPI.COMBINER_NAMED) self.assertEqual(ni, 0) self.assertEqual(na, 0) self.assertEqual(nd, 0) self.assertEqual(dtype.envelope, envelope) self.assertEqual(dtype.combiner, combiner) self.assertTrue(dtype.is_named) self.assertTrue(dtype.is_predefined) otype = dtype.decode() self.assertTrue(dtype is otype)
def skipMPI(predicate, *conditions): from mpi4py import MPI def key(s): s = s.replace(' ', '') s = s.replace('/', '') s = s.replace('-', '') s = s.replace('Microsoft', 'MS') return s.lower() vp = VersionPredicate(key(predicate)) if vp.name == 'mpi': name, version = 'mpi', MPI.Get_version() version = version + (0,) else: name, version = MPI.get_vendor() if vp.name == key(name): if vp.satisfied_by('%d.%d.%d' % version): if not conditions or any(conditions): return unittest.skip(str(vp)) return unittest.skipIf(False, '')
def get_task_comm(): import os,sys print("gtc");sys.stdout.flush() from mpi4py import MPI print("gtc2");sys.stdout.flush() import ctypes task_comm_string = os.getenv("task_comm") print("task_comm_string: " + task_comm_string) task_comm_int = int(task_comm_string) MPI_Comm = ctypes.c_int MPI_Comm.from_address(task_comm_int) newcomm = MPI.Intracomm() newcomm_ptr = MPI._addressof(newcomm) comm_val = MPI_Comm.from_address(newcomm_ptr) # comm_val.value = task_comm_int print("gtc3");sys.stdout.flush() # newcomm.barrier() print("gtc4");sys.stdout.flush() return newcomm
def get_intro_string(self): """Return the string to append to the end of the relax introduction string. @return: The string describing this Processor fabric. @rtype: str """ # Get the specific MPI version. version_info = MPI.Get_version() # The vendor info. vendor = MPI.get_vendor() vendor_name = vendor[0] vendor_version = str(vendor[1][0]) for i in range(1, len(vendor[1])): vendor_version = vendor_version + '.%i' % vendor[1][i] # Return the string. return "MPI %s.%s running via mpi4py with %i slave processors & 1 master. Using %s %s." % (version_info[0], version_info[1], self.processor_size(), vendor_name, vendor_version)
def testPreallocate(self): ## MPICH2 1.0.x emits a nesting level warning ## when preallocating zero size. name, ver = MPI.get_vendor() if not (name == 'MPICH2' and ver < (1,1,0)): self.FILE.Preallocate(0) size = self.FILE.Get_size() self.assertEqual(size, 0) self.FILE.Preallocate(1) size = self.FILE.Get_size() self.assertEqual(size, 1) self.FILE.Preallocate(100) size = self.FILE.Get_size() self.assertEqual(size, 100) self.FILE.Preallocate(10) size = self.FILE.Get_size() self.assertEqual(size, 100) self.FILE.Preallocate(200) size = self.FILE.Get_size() self.assertEqual(size, 200)
def get_watermark(): """ Return information about the current system relevant for pyasdf. """ vendor = MPI.get_vendor() if MPI else None c = h5py.get_config() if not hasattr(c, "mpi") or not c.mpi: is_parallel = False else: is_parallel = True watermark = { "python_implementation": platform.python_implementation(), "python_version": platform.python_version(), "python_compiler": platform.python_compiler(), "platform_system": platform.system(), "platform_release": platform.release(), "platform_version": platform.version(), "platform_machine": platform.machine(), "platform_processor": platform.processor(), "platform_processor_count": cpu_count(), "platform_architecture": platform.architecture()[0], "platform_hostname": gethostname(), "date": strftime('%d/%m/%Y'), "time": strftime('%H:%M:%S'), "timezone": strftime('%Z'), "hdf5_version": h5py.version.hdf5_version, "parallel_h5py": is_parallel, "mpi_vendor": vendor[0] if vendor else None, "mpi_vendor_version": ".".join(map(str, vendor[1])) if vendor else None, "problematic_multiprocessing": is_multiprocessing_problematic() } watermark["module_versions"] = { module: get_distribution(module).version for module in modules} if MPI is None: watermark["module_versions"]["mpi4py"] = None return watermark
def say_hello(comm): r"""Given a communicator, have each process in the communicator call say_hello(). This Python function will be executed by each MPI process. Each will create its own copy of the data array `a` and send that array to the C function `say_hello()`. """ comm_ptr = MPI._addressof(comm) comm_val = MPI_Comm.from_address(comm_ptr) N = 8 a = numpy.ascontiguousarray(numpy.zeros(N, dtype=numpy.double)) libhello.say_hello(a.ctypes.data, N, comm_val) print '%d ---- test: %s'%(comm.rank, a) if (comm.rank == 0): return a else: return None
def go(comm_int): print("go(%i) ..." % comm_int) comm = MPI.COMM_WORLD print("size: %i" % comm.Get_size()) comm.barrier() # MPICH mode: # MPI_Comm = ctypes.c_int # MPI_Comm.from_address(comm_int) # newcomm = MPI.Intracomm() # newcomm_ptr = MPI._addressof(newcomm) # comm_val = MPI_Comm.from_address(newcomm_ptr) # comm_val.value = comm_int # newcomm.barrier() # sys.stdout.flush() # OpenMPI mode (from Zaki) comm_pointer = ctypes.c_void_p mpi4py_comm = MPI.Intracomm() handle = comm_pointer.from_address(MPI._addressof(mpi4py_comm)) handle.value = comm_int mpi4py_comm.barrier()
def is_mpd_running(): name_of_the_vendor, version = MPI.get_vendor() if name_of_the_vendor == "MPICH2": must_check_mpd = True if "AMUSE_MPD_CHECK" in os.environ: must_check_mpd = os.environ["AMUSE_MPD_CHECK"] == "1" if "PMI_PORT" in os.environ: must_check_mpd = False if "HYDRA_CONTROL_FD" in os.environ: must_check_mpd = False if not must_check_mpd: return True try: process = subprocess.Popen(["mpdtrace"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (output_string, error_string) = process.communicate() return not (process.returncode == 255) except OSError as ex: return True else: return True
def recv(): # DEBUG flag ################################################ RECV_CHECK = False # Implementation ################################################ data_package = comm.recv(source=source, tag=52) dp = data_package memory_type = dp.memory_type if memory_type == 'devptr': bytes = dp.data_bytes devptr, usage = malloc_with_swap_out(bytes) buf = MPI.make_buffer(devptr.__int__(), bytes) request = comm.Irecv([buf, MPI.BYTE], source=source, tag=57) if VIVALDI_BLOCKING: MPI.Request.Wait(request) return devptr, data_package, request, buf else: data_dtype = dp.data_dtype if data_dtype == numpy.ndarray: data_memory_shape = dp.data_memory_shape dtype = dp.data_contents_memory_dtype data = numpy.empty(data_memory_shape, dtype=dtype) request = comm.Irecv(data, source=source, tag=57) if RECV_CHECK: # recv check MPI.Request.Wait(request) print "RECV CHECK", data if VIVALDI_BLOCKING: MPI.Request.Wait(request) return data, data_package, request, None return None,None,None,None
def heat_parallel(uk, dx, Nx, dt, num_steps, comm): r"""Solve the heat equation in paralllel. This Python function is executed by each spawned process. Parameters ---------- uk : array Function values for process k. Returns ------- uk : array The updated function values after heat_parallel() """ if (len(uk) != Nx): raise ValueError("Nx should equal the number of grid points.") # note that the code below inherently returns a copy of the original input uk = numpy.ascontiguousarray( numpy.array(uk, dtype=numpy.double)).astype(numpy.double) # mpi comm setup comm_ptr = MPI._addressof(comm) comm_val = c_mpi_comm.from_address(comm_ptr) # set function types and evaluate try: f = homework4library.heat_parallel f.restype = None f.argtypes = [c_void_p, c_double, c_size_t, c_double, c_size_t, c_mpi_comm] f(uk.ctypes.data, dx, Nx, dt, num_steps, comm_val) except AttributeError: raise AttributeError("Something wrong happened when calling the C " "library function.") return uk
def cal_signal_noise_ratio(): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() comm.Barrier() t_start = MPI.Wtime() N_dset = (num_rbin+1)*num_rbin//2 num_kin = 505 l_min = 1 l_max = 2002 delta_l = 3 num_l = (l_max - l_min)//delta_l + 1 f_sky = 15000.0/41253.0 # Survey area 15000 deg^2 is from TF-Stage IV #print("f_sky: ", f_sky) data_type_size = 8 prefix = 'Tully-Fisher_' idir = idir0 + 'mpi_preliminary_data_{}/'.format(Pk_type) #------------- !! write output files, they are the basic files --------------# ofdir = odir0 + 'mpi_{}sn_exp_k_data_{}/comm_size{}/signal_noise_ratio/'.format(prefix, Pk_type, size) ifprefix = idir + prefix if Psm_type == 'Pnow': Gm_ifprefix = idir0 + 'mpi_preliminary_data_Pwig_nonlinear/set_Pnorm_Pnow/' + prefix ofdir = ofdir + 'set_Pnorm_Pnow/' else: Gm_ifprefix = idir0 + 'mpi_preliminary_data_Pwig_nonlinear/' + prefix ofprefix = ofdir + prefix #print('Output file prefix:', ofprefix) if rank == 0: if not os.path.exists(ofdir): os.makedirs(ofdir) inputf = Gm_ifprefix + 'pseudo_shapenoise_{0}rbins.out'.format(num_rbin) # read the default shape noise \sigma^2/n^i print(inputf) pseudo_sn = np.loadtxt(inputf, dtype='f8', comments='#') pseudo_sn = pseudo_sn * snf # times a shape noise scale factor #print(pseudo_sn.shape) else: pseudo_sn = np.zeros(num_rbin) comm.Bcast(pseudo_sn, root=0) default_num_l_in_rank = int(np.ceil(num_l / size)) # Rounding errors here should not be a problem unless default size is very small end_num_l_in_rank = num_l - (default_num_l_in_rank * (size - 1)) assert end_num_l_in_rank >= 1, "Assign fewer number of processes." if (rank == (size - 1)): num_l_in_rank = end_num_l_in_rank else: num_l_in_rank = default_num_l_in_rank # be careful here we have extended photometric redshift bins, which is different from TF case. Cijl_len = num_l_in_rank * N_dset Cijl_sets = np.zeros(Cijl_len) # default case with delta_l = 3 file_Cijl_cross = ifprefix + 'Cij_l_{0}rbins_{1}kbins_CAMB.bin'.format(num_rbin, num_kin) # Cij_l stores Cij for each ell by row Cijl_freader = MPI.File.Open(comm, file_Cijl_cross) # Open and read a binary file Cijl_fh_start = rank * Cijl_len * data_type_size # need to calculate how many bytes shifted Cijl_freader.Seek(Cijl_fh_start) Cijl_freader.Read([Cijl_sets, MPI.DOUBLE]) # Read using individual file pointer #print('Cij(l) from rank', rank, 'is:', Cijl_sets, '\n') comm.Barrier() Cijl_freader.Close() def SNR_fun(l, rank): n_l = default_num_l_in_rank * rank + l ell = l_min + n_l * delta_l cijl_true = Cijl_sets[l*N_dset: (l+1)*N_dset] # if rank == 0: # print(cijl_true) cijl_sn = np.array(cijl_true) # Distinguish the observed C^ijl) (denoted by cijl_sn) from the true C^ij(l) cijl_sn[sn_id] = cijl_true[sn_id] + pseudo_sn # Add shape noise on C^ii terms to get cijl_sn # if rank == 0: # print('cijl_sn:', cijl_sn) Cov_cij_cpq = cal_cov_matrix(num_rbin, iu1, cijl_sn) # Get an upper-triangle matrix for Cov(C^ij, C^pq) from Fortran subroutine wrapped. Cov_cij_cpq = Cov_cij_cpq.T + np.triu(Cov_cij_cpq, k=1) # It's symmetric. Construct the whole matrix for inversion. inv_Cov_cij_cpq = linalg.inv(Cov_cij_cpq, ) inv_Cov_cij_cpq = inv_Cov_cij_cpq * ((2.0*ell+1.0)*delta_l*f_sky) # Take account of the number of modes (the denominator) SN_square_per_ell = reduce(np.dot, [cijl_true, inv_Cov_cij_cpq, cijl_true]) SN_per_ell = SN_square_per_ell**0.5 if rank == size-1: print('ell from rank', rank, 'is', ell, '\n') return ell, SN_per_ell #-------- Output signal-to-noise ratio from each ell -------## SN_ofile = ofprefix + 'SNR_per_ell_{}rbins_{}kbins_snf{}_rank{}.dat'.format(num_rbin, num_kin, snf, rank) SN_data = np.array([], dtype=np.float64).reshape(0, 2) iu1 = np.triu_indices(num_rbin) sn_id = [int((2*num_rbin+1-ii)*ii/2) for ii in range(num_rbin)] # The ID of dset C^ij(l) added by the shape noise when i=j (auto power components) for l in range(num_l_in_rank): ell, SN_per_ell = SNR_fun(l, rank) SN_data = np.vstack((SN_data, np.array([ell, SN_per_ell]))) header_line = " ell S/N" np.savetxt(SN_ofile, SN_data, fmt='%.7e', delimiter=' ', newline='\n', header=header_line, comments='#') comm.Barrier() t_end = MPI.Wtime() if rank == 0: print('With total processes', size, ', the running time:', t_end-t_start)
if ioproc is not None: self.assertTrue(ioproc in vals) def testAppNum(self): if MPI.APPNUM == MPI.KEYVAL_INVALID: return appnum = MPI.COMM_WORLD.Get_attr(MPI.APPNUM) if appnum is not None: self.assertTrue(appnum == MPI.UNDEFINED or appnum >= 0) def testUniverseSize(self): if MPI.UNIVERSE_SIZE == MPI.KEYVAL_INVALID: return univsz = MPI.COMM_WORLD.Get_attr(MPI.UNIVERSE_SIZE) if univsz is not None: self.assertTrue(univsz == MPI.UNDEFINED or univsz >= 0) def testLastUsedCode(self): if MPI.LASTUSEDCODE == MPI.KEYVAL_INVALID: return lastuc = MPI.COMM_WORLD.Get_attr(MPI.LASTUSEDCODE) self.assertTrue(lastuc >= 0) _name, _version = MPI.get_vendor() if (_name == 'MPICH2' and _version > (1, 2)): # Up to mpich2-1.3.1 when running under Hydra process manager, # getting the universe size fails for the singleton init case if MPI.COMM_WORLD.Get_attr(MPI.APPNUM) is None: del TestWorldAttrs.testUniverseSize if __name__ == '__main__': unittest.main()
import os import random import pickle import mpi4py mpi4py.rc.initialize = False from mpi4py import MPI MPI.Init() from ._globals import INDIVIDUAL_TAG, LOSS_REPORT_TAG, INIT_TAG, POPULATION_TAG # TODO top n results instead of top 1, for neural network ensembles class Propulator(): def __init__(self, loss_fn, propagator, comm=None, generations=0, checkpoint_file=None): self.loss_fn = loss_fn self.propagator = propagator self.generations = int(generations) if self.generations < -1: raise ValueError( "Invalid number of generations, needs to be larger than -1, but was {}" .format(self.generations)) self.comm = comm if comm is not None else MPI.COMM_WORLD self.population = [] self.retired = []
if worker== 0: data=collect_data() ini_cent=initial_centroid(data) chunks=np.array_split(data,num_workers) centers_global=[0 for x in range(k)] new_c=[0 for i in range (num_workers)] else: data=None chunks=None ini_cent=None centers_global=None new_c=None vA=comm.scatter(chunks,root=0) vB=comm.bcast(ini_cent,root=0) vC=comm.bcast(new_c,root=0) vD=comm.bcast(centers_global,root=0) #data=comm.bcast(data,root=0) #print(worker,vA) start = MPI.Wtime() new=k_mean(vA,vB) end = MPI.Wtime() #print(new[1]) if worker==0: a=global_centroid(new) print("Runtime", end-start);
def __call__(self, optProb, sens=None, sensStep=None, sensMode=None, storeHistory=None, hotStart=None, storeSens=True): """ This is the main routine used to solve the optimization problem. Parameters ---------- optProb : Optimization or Solution class instance This is the complete description of the optimization problem to be solved by the optimizer sens : str or python Function. Specifiy method to compute sensitivities. To explictly use pyOptSparse gradient class to do the derivatives with finite differenes use \'FD\'. \'sens\' may also be \'CS\' which will cause pyOptSpare to compute the derivatives using the complex step method. Finally, \'sens\' may be a python function handle which is expected to compute the sensitivities directly. For expensive function evaluations and/or problems with large numbers of design variables this is the preferred method. sensStep : float Set the step size to use for design variables. Defaults to 1e-6 when sens is \'FD\' and 1e-40j when sens is \'CS\'. sensMode : str Use \'pgc\' for parallel gradient computations. Only available with mpi4py and each objective evaluation is otherwise serial storeHistory : str File name of the history file into which the history of this optimization will be stored hotStart : str File name of the history file to "replay" for the optimziation. The optimization problem used to generate the history file specified in \'hotStart\' must be **IDENTICAL** to the currently supplied \'optProb\'. By identical we mean, **EVERY SINGLE PARAMETER MUST BE IDENTICAL**. As soon as he requested evaluation point from ParOpt does not match the history, function and gradient evaluations revert back to normal evaluations. storeSens : bool Flag sepcifying if sensitivities are to be stored in hist. This is necessay for hot-starting only. """ self.startTime = time.time() self.callCounter = 0 self.storeSens = storeSens if len(optProb.constraints) == 0: # If the problem is unconstrained, add a dummy constraint. self.unconstrained = True optProb.dummyConstraint = True # Save the optimization problem and finalize constraint # Jacobian, in general can only do on root proc self.optProb = optProb self.optProb.finalize() # Set history/hotstart self._setHistory(storeHistory, hotStart) self._setInitialCacheValues() self._setSens(sens, sensStep, sensMode) blx, bux, xs = self._assembleContinuousVariables() xs = np.maximum(xs, blx) xs = np.minimum(xs, bux) # The number of design variables n = len(xs) oneSided = True if self.unconstrained: m = 0 else: indices, blc, buc, fact = self.optProb.getOrdering( ["ne", "le", "ni", "li"], oneSided=oneSided) m = len(indices) self.optProb.jacIndices = indices self.optProb.fact = fact self.optProb.offset = buc if self.optProb.comm.rank == 0: class Problem(_ParOpt.Problem): def __init__(self, ptr, n, m, xs, blx, bux): super().__init__(MPI.COMM_SELF, n, m) self.ptr = ptr self.n = n self.m = m self.xs = xs self.blx = blx self.bux = bux self.fobj = 0.0 return def getVarsAndBounds(self, x, lb, ub): """Get the variable values and bounds""" # Find the average distance between lower and upper bound bound_sum = 0.0 for i in range(len(x)): if self.blx[i] <= -INFINITY or self.bux[i] >= INFINITY: bound_sum += 1.0 else: bound_sum += self.bux[i] - self.blx[i] bound_sum = bound_sum / len(x) for i in range(len(x)): x[i] = self.xs[i] lb[i] = self.blx[i] ub[i] = self.bux[i] if self.xs[i] <= self.blx[i]: x[i] = self.blx[i] + 0.5 * np.min( (bound_sum, self.bux[i] - self.blx[i])) elif self.xs[i] >= self.bux[i]: x[i] = self.bux[i] - 0.5 * np.min( (bound_sum, self.bux[i] - self.blx[i])) return def evalObjCon(self, x): """Evaluate the objective and constraint values""" fobj, fcon, fail = self.ptr._masterFunc( x[:], ["fobj", "fcon"]) self.fobj = fobj return fail, fobj, -fcon def evalObjConGradient(self, x, g, A): """Evaluate the objective and constraint gradients""" gobj, gcon, fail = self.ptr._masterFunc( x[:], ["gobj", "gcon"]) g[:] = gobj[:] for i in range(self.m): A[i][:] = -gcon[i][:] return fail optTime = MPI.Wtime() # Optimize the problem problem = Problem(self, n, m, xs, blx, bux) optimizer = _ParOpt.Optimizer(problem, self.set_options) optimizer.optimize() x, z, zw, zl, zu = optimizer.getOptimizedPoint() # Set the total opt time optTime = MPI.Wtime() - optTime # Get the obective function value fobj = problem.fobj if self.storeHistory: self.metadata["endTime"] = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") self.metadata["optTime"] = optTime self.hist.writeData("metadata", self.metadata) self.hist.close() # Create the optimization solution. Note that the signs on the multipliers # are switch since ParOpt uses a formulation with c(x) >= 0, while pyOpt # uses g(x) = -c(x) <= 0. Therefore the multipliers are reversed. sol_inform = {} # If number of constraints is zero, ParOpt returns z as None. # Thus if there is no constraints, should pass an empty list # to multipliers instead of z. if z is not None: sol = self._createSolution(optTime, sol_inform, fobj, x[:], multipliers=-z) else: sol = self._createSolution(optTime, sol_inform, fobj, x[:], multipliers=[]) # Indicate solution finished self.optProb.comm.bcast(-1, root=0) else: # We are not on the root process so go into waiting loop: self._waitLoop() sol = None # Communication solution and return sol = self._communicateSolution(sol) return sol
def main(files, dark_file, center, inner_radii, outer_radii, output_file, generate_sparse): center = center.split(',') if len(center) != 2: msg = 'Center must be of the form: center_x,center_y.' raise click.ClickException(msg) center = tuple(int(x) for x in center) inner_radii = inner_radii.split(',') outer_radii = outer_radii.split(',') if len(inner_radii) != len(outer_radii): msg = 'Number of inner and outer radii must match' raise click.ClickException(msg) inner_radii = [int(x) for x in inner_radii] outer_radii = [int(x) for x in outer_radii] comm = MPI.COMM_WORLD rank = comm.Get_rank() world_size = comm.Get_size() if (world_size > len(files)): if rank == 0: print('Error: number of MPI processes,', world_size, ', exceeds', 'the number of files:', len(files)) return comm.Barrier() start = MPI.Wtime() if dark_file is not None: # Every process will do the dark field reference average for now reader = io.reader(dark_file, version=io.FileVersion.VERSION3) dark = image.calculate_average(reader) else: dark = np.zeros((576, 576)) # Split up the files among processes files = get_files(files) # Create local electron count reader = io.reader(files, version=io.FileVersion.VERSION3) electron_counted_data = image.electron_count(reader, dark, verbose=True) local_frame_events = electron_counted_data.data # Now reduce to root global_frame_events = reduce_to_root_method1(local_frame_events) # global_frame_events = reduce_to_root_method2(local_frame_events) comm.Barrier() end = MPI.Wtime() if rank == 0: print('time: %s' % (end - start)) if rank == 0: # Create new electron counted data with the global frame events data = namedtuple('ElectronCountedData', ['data', 'scan_dimensions', 'frame_dimensions']) data.data = global_frame_events data.scan_dimensions = electron_counted_data.scan_dimensions data.frame_dimensions = electron_counted_data.frame_dimensions # Write out the HDF5 file io.save_electron_counts(output_file, data) if generate_sparse: # Save out the sparse image stem_imgs = image.create_stem_images(data, inner_radii, outer_radii, center=center) for i, img in enumerate(stem_imgs): fig, ax = plt.subplots(figsize=(12, 12)) ax.matshow(img) name = 'sparse_stem_image_' + str(i) + '.png' plt.savefig(name, dpi=300)
def proc_write(twx_cfg, ncdf_mode, start_ymd, end_ymd, nwrkers): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) days = stn_da.days nwrkrs_done = 0 bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stnids_tmin, stnids_tmax = bcast_msg print "WRITER: Received broadcast msg" if ncdf_mode == 'r+': ds_tmin = Dataset(twx_cfg.fpath_stndata_nc_infill_tmin, 'r+') ds_tmax = Dataset(twx_cfg.fpath_stndata_nc_infill_tmax, 'r+') ttl_infills = stnids_tmin.size + stnids_tmax.size stnids_tmin = ds_tmin.variables[STN_ID][:].astype(np.str) stnids_tmax = ds_tmax.variables[STN_ID][:].astype(np.str) else: stns_tmin = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmin, assume_unique=True)] variables_tmin = [ ('tmin', 'f4', netCDF4.default_fillvals['f4'], 'minimum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmin_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled minimum air temperature', 'C') ] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmin, stns_tmin, days, variables_tmin) stnda_out_tmin = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmin, mode="r+") stnda_out_tmin.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmin.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmin = stnda_out_tmin.ds stns_tmax = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmax, assume_unique=True)] variables_tmax = [ ('tmax', 'f4', netCDF4.default_fillvals['f4'], 'maximum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmax_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled maximum air temperature', 'C') ] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmax, stns_tmax, days, variables_tmax) stnda_out_tmax = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmax, mode="r+") stnda_out_tmax.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmax.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmax = stnda_out_tmax.ds ttl_infills = stnids_tmin.size + stnids_tmax.size print "WRITER: Infilling a total of %d station time series " % ( ttl_infills, ) print "WRITER: Output NCDF files ready" stat_chk = StatusCheck(ttl_infills, 10) while 1: result = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) stn_id, tair_var, tair, fill_mask, tair_infill, mae, bias = result if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "Writer: Finished" return 0 else: if tair_var == 'tmin': stn_idx = np.nonzero(stnids_tmin == stn_id)[0][0] ds = ds_tmin else: stn_idx = np.nonzero(stnids_tmax == stn_id)[0][0] ds = ds_tmax ds.variables[tair_var][:, stn_idx] = tair ds.variables["".join([tair_var, "_infilled"])][:, stn_idx] = tair_infill ds.variables['flag_infilled'][:, stn_idx] = fill_mask ds.variables['bias'][stn_idx] = bias ds.variables[LAST_VAR_WRITTEN][stn_idx] = mae ds.sync() print "|".join([ "WRITER", stn_id, tair_var, "%.4f" % (mae, ), "%.4f" % (bias, ) ]) stat_chk.increment()
""" if "null" in in_line: in_line = in_line.replace("null", "\"NA\"") # replace nulls with NA strings in_line = in_line.split("\t") # split lines by any tabs for i in range(len(in_line)): in_line[i] = ast.literal_eval( in_line[i] ) # convert string representation of object to that actual object return flatten_list(in_line) if __name__ == '__main__': comm = MPI.COMM_WORLD rank, size = comm.Get_rank(), comm.Get_size() name = MPI.Get_processor_name() args = iter(sys.argv) next(args) file_name = next(args) # get name of file to process from script argument out_file_name = file_name.split( ".")[0] + ".csv" # create output CSV filename if rank == 0: # some status messages print("Converting " + file_name) print("Starting node " + str(rank + 1) + " of " + str(size) + " on " + str(name)) out_lines = [] num_lines = 0 start = time.time() # start the timer
A = A.toarray() b = rand(SIZE) else : A = None b = None #########Send b to all procs and scatter A (each proc has its own local matrix##### #LocalMatrix = # Scatter the matrix A #####################Compute A*b locally####################################### #LocalX = start = MPI.Wtime() matrixVectorMult(LocalMatrix, b, LocalX) stop = MPI.Wtime() if RANK == 0: print("CPU time of parallel multiplication is ", (stop - start)*1000) ##################Gather te results ########################################### # sendcouns = local size of result #sendcounts = # if RANK == 0: # X = ... # else : # X = .. # Gather the result into X
stdout.flush() comm.barrier() if rank == 0: print("") print("Status with ANY_SOURCE:") # Alternative version that finds out the sender after communication if rank > 0: req_recv = comm.Irecv(buff, source=MPI.ANY_SOURCE, tag=rank) if rank < size - 1: req_send = comm.Isend(data, dest=tgt, tag=tgt) req_send.Wait() print(" Rank %d: sent %d elements using tag '%d'." % \ (rank, len(data), tgt)) if rank > 0: info = MPI.Status() req_recv.Wait(info) print(" Rank %d: received a message from rank %d." % (rank, info.Get_source())) print(" Rank %d: received an array filled with %ds." % (rank, buff[0])) # ... wait for every rank to finish ... stdout.flush() comm.barrier() if rank == 0: print("") print("Simplified with PROC_NULL:") # Simplified version where all ranks can send send_tag = tgt if tgt >= size:
def testIsFinalized(self): flag = MPI.Is_finalized() self.assertTrue(type(flag) is bool) self.assertFalse(flag)
#!/usr/bin/env python from mpi4py import MPI # Example for creating a cartesian topology # and identifying your neighbors comm = MPI.COMM_WORLD world_rank = comm.Get_rank() size = comm.Get_size() ndim = 2 dims = MPI.Compute_dims(size, [0] * ndim) cart_comm = comm.Create_cart(dims, periods=[True, True], reorder=True) new_rank = cart_comm.Get_rank() if new_rank == 0: print("Cart dim: %s" % (dims)) for i in range(ndim): for d in (-1, +1): source, dest = cart_comm.Shift(i, d) if new_rank == 0: print("Dir %d, disp %d - Src %d - Dest %d" % (i, d, source, dest)) cart_comm.Free()
# and returns the results as a [0,1] integer matrix to the 0th MPI process # make sure you use the same number of MPI processes as you did to calculate the matrices! full_matrix = np.zeros(len(df_test)).astype(int) my_matrix = np.any(my_matrix > sim, axis=0).astype(int) mpi_comm.Reduce([my_matrix, MPI.INT], [full_matrix, MPI.INT], op=MPI.SUM, root=0) # 0th MPI process uses the full integer matrix to do Tanimoto splitting and save the new splits if mpi_rank == 0: print('Percentage of test set with similarity > {}: {:.1f}%'.format( sim, 100 * np.count_nonzero(full_matrix) / len_df)) print('Original train/test ratio: {:.1f}%/{:.1f}%'.format( 100 * len(df_train) / len_df, 100 * len(df_test) / len_df)) full_matrix = full_matrix.astype( bool) # anything other than 0 is converted to False df_train_new = df_train.append( df_test.iloc[full_matrix] ) # test set reactions with an integer 1 get appended into train set df_test_new = df_test[ ~full_matrix] # only keep test set reactions with integer 0 print('Post-split train/test ratio: {:.1f}%/{:.1f}%'.format( 100 * len(df_train_new) / len_df, 100 * len(df_test_new) / len_df)) df_train_new.to_csv('train_split_new.txt', index=False) df_test_new.to_csv('test_split_new.txt', index=False) mpi_comm.Barrier() MPI.Finalize()
elif (count == 9): S.nsteps = c_int(int(value)) elif (count == 10): S.dt = c_double(float(value)) elif (count == 11): nprint = int(value) return restfile, trajfile, ergfile, nprint #**************************************************************************************** # MAIN # read input file system = mdsys_t() comm = MPI.COMM_WORLD comm_ptr = MPI._addressof(comm) comm_val = MPI_Comm.from_address(comm_ptr) system.mpicomm = comm_val system.mpirank = comm.Get_rank() system.nprocs = comm.Get_size() try: system.nthreads = int(os.environ['OMP_NUM_THREADS']) except KeyError: if system.mpirank == 0: print("ERROR: Please set the environment variable OMP_NUM_THREADS") sys.exit(1) if (system.mpirank == 0):
from mpi4py import MPI import argparse import time from sys import exit rank = MPI.COMM_WORLD.Get_rank() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--sleep', type=int, default=0) parser.add_argument('--retcode', type=int, default=0) args = parser.parse_args() print("Rank", rank, "on", MPI.Get_processor_name()) if args.sleep: time.sleep(args.sleep) exit(args.retcode)
from mpi4py import MPI import numpy as np # setup communicator and get # procs comm = MPI.COMM_WORLD nprocs = comm.Get_size() # We are solving the Inhomogeneous 1D groundwater flow problem # 0 = d/dx[k(x)dh/dx] # from x = 0 to x = L, i.e. we are solving along a 1 dim grid # To simplify the parallelization, we can use the Cartesian Conv. Function # from MPI. For this example, we could figure this out ourselves pretty easily # but when scaling up, this is really convenient # get dimensions dims = MPI.Compute_dims(nprocs, 1) # setup grid grid = comm.Create_cart(dims) # now we want the gridded rank rank = grid.Get_rank() # the jacobi algorithm is needs the previous and next values # grid.Shift(direction (dim), displacement) # So shift along the zeroth axis, 1 unit in both directions # Shift return -1 for values less than 0 or greater than nprocs left, right = grid.Shift(0,1) # grab the status to be used in sendrecv calls later status = MPI.Status() # setup problem parameters h0 = 1.0 # GW head at x = 0
def execute(): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() status = MPI.Status() if Conf.UseCuda and rank > Conf.GPUCount: return if rank == 0: # Master process try: # Set the default tensor type if Conf.UseCuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) else: torch.set_default_tensor_type(torch.FloatTensor) except: print('Caught exception in init()') Conf.Tag = Tags.Exit dump_exception() if Conf.Evaluator is None: Conf.Evaluator = train assert Conf.DataLoader is not None, "Please assign a data loader function." # Wait for all workers to return a Ready signal while len(Conf.Workers) < size - 1: while not comm.iprobe(source=MPI.ANY_SOURCE): pass comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() if tag == Tags.Ready: print('Worker {} ready'.format(source)) Conf.Workers.append(source) elif tag == Tags.Exit: Conf.Tag = Tags.Exit break # List of free workers free_workers = [] for worker in Conf.Workers: free_workers.append(worker) # Worker management subroutine def manage_workers(free_workers): while not comm.iprobe(source=MPI.ANY_SOURCE): time.sleep(0.1) net = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() if tag == Tags.Done: cn.Net.Ecosystem[net.ID] = net free_workers.append(source) elif tag == Tags.Exit: Conf.Tag = Tags.Exit Conf.Workers.remove(source) # Run the task for run in range(1, Conf.Runs + 1): if len(Conf.Workers) == 0: break print(f'\n===============[ Run {run} ]===============') # Initialise the ecosystem try: print("\n======[ Initialising ecosystem ]======\n") init() if not Conf.UnitTestMode: os.makedirs(Conf.LogDir, exist_ok = True) Conf.Logger = SummaryWriter(Conf.LogDir + f'/run_{run}') except: print('Caught exception in init()') Conf.Tag = Tags.Exit dump_exception() break for epoch in range(1, Conf.Epochs + 1): if len(Conf.Workers) == 0: break print(f'\n===============[ Epoch {epoch} ]===============') print("\n======[ Evaluating ecosystem ]======\n") # Dispatch the networks for evaluation net_ids = dcp(list(cn.Net.Ecosystem.keys())) while (len(Conf.Workers) > 0 and len(net_ids) > 0): # Wait for a free worker while (len(Conf.Workers) > 0 and len(free_workers) == 0): manage_workers(free_workers) if len(free_workers) > 0: worker = free_workers.pop() net_id = net_ids.pop() package = (cn.Net.Ecosystem[net_id], Conf(run, epoch, worker if Conf.UseCuda else None)) comm.send(package, dest = worker, tag=Conf.Tag) # Wait for the last workers to finish while len(free_workers) < len(Conf.Workers): manage_workers(free_workers) if len(Conf.Workers) > 0: try: # Compute the relative fitness of networks and species. calibrate(run, epoch) if epoch < Conf.Epochs: # Evolve the ecosystem evolve(run, epoch) # Eliminate unfit networks and empty species. if len(cn.Net.Ecosystem) > cn.Net.Max.Count: cull() except: print('Caught exception in evolve()') Conf.Tag = Tags.Exit dump_exception() break if os.path.exists(Conf.LogDir): with open(Conf.LogDir + '/config.txt', 'w+') as cfg_file: print_conf(_file = cfg_file) # Save global statistics for key, stat in Conf.Stats.items(): stat.title = key print(stat.as_str()) save_stat(stat) Conf.Tag = Tags.Exit print('\n\n>>> Sending exit command to workers...') for worker in Conf.Workers: comm.send(None, dest = worker, tag=Conf.Tag) else: comm.send(None, dest=0, tag=Tags.Ready) while True: while not comm.iprobe(source=0): time.sleep(0.1) package = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() if tag == Tags.Start: net, conf = package[0], package[1] try: conf.evaluator(net, conf) comm.send(net, dest=0, tag = Tags.Done) except Exception: print(f'Caught exception in worker {rank} while evaluating network {net.ID}') print(net.as_str(_parameters = True)) dump_exception() break elif tag == Tags.Exit: break comm.send(None, dest=0, tag=Tags.Exit) print(f'Worker {rank} exiting...')
def input(path, nb_mpi): """ Simulate some random current input :param path: the file for the configurations of the connection :param nb_mpi: number of mpi rank for testing multi-threading and MPI simulation :return: """ #Start communication channels path_to_files = path #For NEST # Init connection print("Waiting for port details") info = MPI.INFO_NULL root = 0 port = MPI.Open_port(info) fport = open(path_to_files, "w+") fport.write(port) fport.close() print('wait connection ' + port) sys.stdout.flush() comm = MPI.COMM_WORLD.Accept(port, info, root) print('connect to ' + port) #test one rate status_ = MPI.Status() check = np.empty(1, dtype='b') starting = 1 count = 0 while True: comm.Recv([check, 1, MPI.CXX_BOOL], source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status_) print(" start to send") sys.stdout.flush() print(" status a tag ", status_.Get_tag()) sys.stdout.flush() if status_.Get_tag() == 0: # receive list ids size_list = np.empty(1, dtype='i') comm.Recv([size_list, 1, MPI.INT], source=status_.Get_source(), tag=0, status=status_) if size_list[0] != 0: print("size list id", size_list) sys.stdout.flush() list_id = np.empty(size_list, dtype='i') comm.Recv([list_id, size_list, MPI.INT], source=status_.Get_source(), tag=0, status=status_) print(" id ", list_id) sys.stdout.flush() print(" source " + str(status_.Get_source())) sys.stdout.flush() shape = np.random.randint(0, 50, 1, dtype='i') * 2 data = starting + np.random.rand(shape[0]) * 200 data = np.around(np.sort(np.array(data, dtype='d')), decimals=1) send_shape = np.array(np.concatenate([shape, shape]), dtype='i') comm.Send([send_shape, MPI.INT], dest=status_.Get_source(), tag=list_id[0]) print(" shape data ", shape) sys.stdout.flush() comm.Send([data, MPI.DOUBLE], dest=status_.Get_source(), tag=list_id[0]) print(" send data", data) sys.stdout.flush() elif status_.Get_tag() == 1: print("end run") sys.stdout.flush() if count % nb_mpi == 0: starting += 200 count += 1 elif (status_.Get_tag() == 2): for i in range(nb_mpi - 1): print(" receive ending") sys.stdout.flush() comm.Recv([check, 1, MPI.CXX_BOOL], source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status_) print(" receive ending") sys.stdout.flush() print("end simulation") sys.stdout.flush() print("ending time : ", starting) break else: print(status_.Get_tag()) break comm.Disconnect() MPI.Close_port(port) os.remove(path_to_files) print('exit') MPI.Finalize()
def __del__(self): MPI.Finalize()
def enum(*sequential, **named): """Handy way to fake an enumerated type in Python http://stackoverflow.com/questions/36932/how-can-i-represent-an-enum-in-python """ enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums) # Define MPI message tags tags = enum('READY', 'DONE', 'EXIT', 'START') # Initializations and preliminaries comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object num_workers = size - 1 t0 = time.time() m_s = 0.376176 npart_max = 400 hdf = h5py.File('mc-data-all-parallel.hdf5', 'a', driver='mpio', comm=MPI.COMM_WORLD) if rank == 0: # Master print("Monte-Carlo simulations of runaway-merger IMBH formation\n") print("Master starting with %d workers" % num_workers)
def parallel_jacobi_rotate(comm, A, ind_j, ind_k): sz = A.shape[0] rank = comm.Get_rank() pool_size = comm.Get_size() c = s = 0.0 j = k = 0 row_j, row_k = np.zeros(sz), np.zeros(sz) if rank == 0: j, k = ind_j, ind_k if A[j, j] == A[k, k]: c = np.cos(np.pi / 4) s = np.sin(np.pi / 4) else: tau = (A[j, j] - A[k, k]) / (2 * A[j, k]) t = sign(tau) / (abs(tau) + np.sqrt(1 + tau ** 2)) c = 1 / np.sqrt(1 + t ** 2) s = c * t for i in range(sz): row_j[i] = A[j, i] row_k[i] = A[k, i] j = comm.bcast(j, root=0) k = comm.bcast(k, root=0) c = comm.bcast(c, root=0) s = comm.bcast(s, root=0) comm.Bcast(row_j, root=0) comm.Bcast(row_k, root=0) row_j_comm = comm.Create_group(comm.group.Incl([i for i in range(1, pool_size) if i % 2 == 1])) row_k_comm = comm.Create_group(comm.group.Incl([i for i in range(1, pool_size) if i % 2 == 0])) row_j_rank = row_j_size = -1 row_j_new = np.zeros(sz) if MPI.COMM_NULL != row_j_comm: row_j_rank = row_j_comm.Get_rank() row_j_size = row_j_comm.Get_size() size = int(sz / row_j_size) row_j_part = np.zeros(size) row_k_part = np.zeros(size) row_j_new_part = np.zeros(size) row_j_comm.Scatter(row_j, row_j_part, root=0) row_j_comm.Scatter(row_k, row_k_part, root=0) for i in range(size): row_j_new_part[i] = c * row_j_part[i] + s * row_k_part[i] row_j_comm.Gather(row_j_new_part, row_j_new, root=0) if row_j_rank == 0: comm.Send([row_j_new, sz, MPI.FLOAT], dest=0, tag=0) row_j_comm.Free() row_k_rank = row_k_size = -1 row_k_new = np.zeros(sz) if MPI.COMM_NULL != row_k_comm: row_k_rank = row_k_comm.Get_rank() row_k_size = row_k_comm.Get_size() size = int(sz / row_k_size) row_j_part = np.zeros(size) row_k_part = np.zeros(size) row_k_new_part = np.zeros(size) row_k_comm.Scatter(row_j, row_j_part, root=0) row_k_comm.Scatter(row_k, row_k_part, root=0) for i in range(size): row_k_new_part[i] = s * row_j_part[i] - c * row_k_part[i] row_k_comm.Gather(row_k_new_part, row_k_new, root=0) if row_k_rank == 0: comm.Send([row_k_new, sz, MPI.FLOAT], dest=0, tag=0) row_k_comm.Free() if rank == 0: status = MPI.Status() comm.Recv([row_j_new, sz, MPI.FLOAT], source=1, tag=0, status=status) comm.Recv([row_k_new, sz, MPI.FLOAT], source=2, tag=0, status=status) A[j, k] = (c ** 2 - s ** 2) * row_j[k] + s * c * (row_k[k] - row_j[j]) A[k, j] = A[j, k] A[j, j] = c ** 2 * row_j[j] + 2 * s * c * row_j[k] + s ** 2 * row_k[k] A[k, k] = s ** 2 * row_j[j] - 2 * s * c * row_j[k] + c ** 2 * row_k[k] for i in range(sz): if i != j and i != k: A[j, i] = row_j_new[i] A[k, i] = row_k_new[i] A[i, j] = A[j, i] A[i, k] = A[k, i] return A
num_nodes = comm.Get_size() # Get their rank rank = comm.Get_rank() # Print a start checkpoint if I am the master node if rank == 0: print("Start", flush=True) # Compute number of trials per node trials_per_node = np.ceil(total_num_trials / num_nodes).astype(int) # Sync all nodes here to start a 'more accurate' time measurement comm.Barrier() # Init send (i.e. elapsed_buff) and receive (i.e. longest_elapsed_buff) buffer for MAX reduction # as a one dimensional zero-filled arrays elapsed_buff = np.zeros(1, dtype=np.float64) # longest_elapsed_buff = np.zeros(1,dtype=np.float64) # Get start time start = MPI.Wtime() # Compute (x,y) random points which fall inside the circle points_in_circle_per_node = random_points_generator(trials_per_node) # Record the time when the function is done finish = MPI.Wtime() # Compute the time delta elapsed_buff[0] = (finish - start) # Each processor prints its wall-clock time print("Processor {0} finished in {1:.6f}s.".format(rank, elapsed_buff[0]), flush=True) # Do a MAX reduction to record the slowest processor # comm.Reduce([elapsed_buff, MPI.DOUBLE],[longest_elapsed_buff, MPI.DOUBLE],op=MPI.MAX,root=0) # Get rid of the array, just need a number # longest_elapsed = longest_elapsed_buff[0] # Init send (i.e. points_in_buff) and receive (i.e. tot_points_in_buff) buffer # for SUM reduction
def main(queue, rs_fname): """ Dispatch the work among processors. Input: queue = list of job inputs """ n_tasks = len(queue) if suspend_resume_enabled: rs_file = open(rs_fname, "rb") done = pickle.load(rs_file) rs_file.close() else: done = [] WORKTAG = 1 DIETAG = 2 nprocs = comm.Get_size() for rank in range(1, min(len(queue) + 1, nprocs)): input_file = pop_left(queue) while input_file != None and list(input_file.keys())[0] in done: input_file = pop_left(queue) if input_file == None: break comm.send(input_file, dest=rank, tag=WORKTAG) print("MASTER: first loop terminated") while queue: input_file = pop_left(queue) while input_file != None and list(input_file.keys())[0] in done: input_file = pop_left(queue) if input_file == None: break status = MPI.Status() flag = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) assert ( status.tag == 0 ), "Wrong tag: a message signalling a finished task expected" done.append(list(flag.keys())[0]) if suspend_resume_enabled: rs_file = open(rs_fname, "wb") pickle.dump(done, rs_file) rs_file.close() comm.send(input_file, dest=status.source, tag=WORKTAG) print("MASTER: second loop terminated") while len(done) < n_tasks: status = MPI.Status() flag = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) assert ( status.tag == 0 ), "Wrong tag: a message signalling a finished task expected" done.append(list(flag.keys())[0]) if suspend_resume_enabled: rs_file = open(rs_fname, "wb") pickle.dump(done, rs_file) rs_file.close() print("MASTER: third loop terminated") status = MPI.Status() for rank in range(1, nprocs): comm.send(0, dest=rank, tag=DIETAG) for rank in range(1, nprocs): exit_m = comm.recv(source=rank, tag=1, status=status) if suspend_resume_enabled: os.remove(rs_fname) return
def __init__(self, comm, odir, pid, ndims, L, N, method): # "Protected" variables masked by property method self._odir = odir self._pid = pid self._comm = comm self._ndims = ndims self._config = "Unknown (Base Configuration)" self._periodic = [False] * ndims # "Public" variables self.tol = 1.0e-6 self.prefix = pid + '-' # Global domain variables if np.iterable(N): if len(N) == 1: self._nx = np.array(list(N) * ndims, dtype=np.int) elif len(N) == ndims: self._nx = np.array(N, dtype=np.int) else: raise IndexError("The length of N must be either 1 or ndims") else: self._nx = np.array([N] * ndims, dtype=np.int) if np.iterable(L): if len(L) == 1: self._L = np.array(list(L) * ndims) elif len(L) == ndims: self._L = np.array(L) else: raise IndexError("The length of L must be either 1 or ndims") else: self._L = np.array([L] * ndims, dtype=np.float) self.dx = self._L / self._nx self.Nx = self._nx.prod() # Local subdomain variables (1D Decomposition) self.nnx = self._nx.copy() self.ixs = np.zeros(ndims, dtype=np.int) self.ixe = self._nx.copy() self.nnx[0] = self._nx[0] // self.comm.size self.ixs[0] = self.nnx[0] * self.comm.rank self.ixe[0] = self.ixs[0] + self.nnx[0] # eventually add other subdomain decompositions # MAKE ODIR, CHECKING IF IT IS A VALID PATH. if comm.rank == 0: try: os.makedirs(odir) except OSError as e: if not os.path.isdir(odir): raise e else: status = e finally: if os.path.isdir(odir): status = 0 else: status = None status = comm.bcast(status) if status != 0: MPI.Finalize() sys.exit(999) self.mpi_moments_file = '%s%s.moments' % (self.odir, self.prefix) if method == 'central_diff': self.deriv = self._centdiff_deriv elif method == 'spline_flux_diff': self.deriv = self._akima_deriv elif method == 'ignore': self.deriv = None else: if comm.rank == 0: print("mpiAnalyzer._baseAnalyzer.__init__(): " "'method' argument not recognized!\n" "Defaulting to Akima spline flux differencing.") self.deriv = self._akima_deriv
fname = os.path.join(prefix, 'performance_profile.dat') fp = open(fname, 'w') # Iterate over all the trusses index = 0 for vals in trusses: # Set the values of N/M N = vals[0] M = vals[1] print('Optimizing truss (%d x %d) ...' % (N, M)) # Optimize each of the trusses truss = setup_ground_struct(N, M) t0 = MPI.Wtime() if optimizer is 'None': opt = paropt_truss(truss, prefix=prefix, use_tr=use_tr, use_hessian=use_hessian) # Get the optimized point x, z, zw, zl, zu = opt.getOptimizedPoint() else: # Read out the options from the dictionary of options options = all_options[optimizer] # Set the output file filename = os.path.join(prefix, 'output_%dx%d.out' % (N, M)) options[outfile_name] = filename
elapsed_time += MPI.Wtime() norm = comm.bcast(norm, root=0) i, j = indexes_max_elem(A) k += 1 return np.diag(A).tolist() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, help='Input file') parser.add_argument('--output', required=True, help='Output file') args = parser.parse_args() elapsed_time = 0 need_args = ('matrix', 'eps') init_dict = read_data(args.input, need_args) A, eps = init_dict['matrix'], init_dict['eps'] comm = MPI.COMM_WORLD rank = comm.Get_rank() elapsed_time -= MPI.Wtime() eig = jacobi_parallel(comm, A, eps) elapsed_time += MPI.Wtime() if rank == 0: save_to_file(args.output, eigenvalues=eig) print("Dimension {0}, time elapsed {1}\n".format(A.shape[0], elapsed_time)) MPI.Finalize()
if __name__ == '__main__': for i in range(no_chains + no_helpers): print("Joining",i) jobs[i].join() ## FINISH SAMPLING PROCESS print('Master finished') shared_queue_solver.cancel_join_thread() shared_queue_surrogate.cancel_join_thread() shared_queue_updater.cancel_join_thread() shared_queue_surrogate_solver.cancel_join_thread() print("Queues empty?", shared_queue_solver.empty(), shared_queue_surrogate.empty(), shared_queue_updater.empty(), shared_queue_surrogate_solver.empty()) if rank_world in group_leader_ids: print("Hello, I am group leader with local rank", comm_local.Get_rank(), "and global rank", rank_world ) finish = 0 status = MPI.Status() data_par = np.zeros(no_parameters) data_obs = np.zeros(no_observations)+rank_world while finish == 0: comm_world.Recv(data_par, source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() if tag == 150: print("BYE BYE from rank_world", rank_world) finish = 1 else: # x = data_par[0] # y = data_par[1] # data_obs[0]=pow(pow(x,2)+y-11,2)+pow(x+pow(y,2)-7,2) # a = data_par[0] # b = data_par[1]
# PartitionNum = 20 # DataPath = '/home/mapred/GraphData/uk/subdata/' VertexNum = 787803000 PartitionNum = 3000 # DataPath = '/home/mapred/GraphData/twitter/subdata/' # VertexNum = 41652250 # PartitionNum = 50 GraphInfo = (DataPath, VertexNum, PartitionNum, VertexNum / PartitionNum) test_graph = satgraph() rank_0_host = None if MPI.COMM_WORLD.Get_rank() == 0: rank_0_host = MPI.Get_processor_name() rank_0_host = MPI.COMM_WORLD.bcast(rank_0_host, root=0) test_graph.set_Dtype_All(Dtype_All) test_graph.set_GraphInfo(GraphInfo) test_graph.set_IP(rank_0_host) test_graph.set_port(18086, 18087) test_graph.set_ThreadNum(4) test_graph.set_MaxIteration(100) test_graph.set_StaleNum(3) # test_graph.set_FilterThreshold(0) test_graph.set_FilterThreshold(0.00000001) test_graph.set_CalcFunc(calc_pagerank) test_graph.run('pagerank') os._exit(0)
def testGetProcessorName(self): procname = MPI.Get_processor_name() self.assertTrue(isinstance(procname, str))
def proc_work(twx_cfg, start_ymd, end_ymd, params_ppca, rank): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) days = stn_da.days ndays = float(days.size) empty_fill = np.ones(ndays, dtype=np.float32) * netCDF4.default_fillvals['f4'] empty_flags = np.ones(ndays, dtype=np.int8) * netCDF4.default_fillvals['i1'] empty_bias = netCDF4.default_fillvals['f4'] empty_mae = netCDF4.default_fillvals['f4'] ds_nnr = NNRNghData(twx_cfg.path_reanalysis_namerica, (start_ymd, end_ymd)) mths = np.arange(1, 13) mth_masks = [stn_da.days[MONTH] == mth for mth in mths] vnames_mean_tmin = [get_mean_varname('tmin', mth) for mth in mths] vnames_vari_tmin = [get_variance_varname('tmin', mth) for mth in mths] vnames_mean_tmax = [get_mean_varname('tmax', mth) for mth in mths] vnames_vari_tmax = [get_variance_varname('tmax', mth) for mth in mths] bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stnids_tmin, stnids_tmax = bcast_msg print "".join(["WORKER ", str(rank), ": Received broadcast msg"]) print "".join([ "WORKER ", str(rank), ": Minimum number of station neighbors for infilling: ", str(params_ppca['min_daily_nnghs']) ]) while 1: stn_id = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send([None] * 7, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["WORKER ", str(rank), ": Finished"]) return 0 else: try: run_infill_tmin = stn_id in stnids_tmin run_infill_tmax = stn_id in stnids_tmax if run_infill_tmin: results = infill_tair(stn_id, stn_da, 'tmin', ds_nnr, vnames_mean_tmin, vnames_vari_tmin, mth_masks, params_ppca) fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results if run_infill_tmax: results = infill_tair(stn_id, stn_da, 'tmax', ds_nnr, vnames_mean_tmax, vnames_vari_tmax, mth_masks, params_ppca) fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results except Exception as e: print "".join( ["ERROR: Could not infill ", stn_id, "|", str(e)]) if run_infill_tmin: results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results if run_infill_tmax: results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results if run_infill_tmin: MPI.COMM_WORLD.send((stn_id, 'tmin', fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin), dest=RANK_WRITE, tag=TAG_DOWORK) if run_infill_tmax: MPI.COMM_WORLD.send((stn_id, 'tmax', fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax), dest=RANK_WRITE, tag=TAG_DOWORK) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)