def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI status = MPI.Status() hostname = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Command line flag used to indicate a spawned worker instance workerflag = '--mpi-worker' numworkers = args.mpi - 1 ################## # Master process # ################## if workerflag not in sys.argv: # N.B Spawned worker flag (--mpi-worker) applied to sys.argv when MPI.Spawn is called # Get MPI communicator object either through argument or just get comm_world if hasattr(args, 'mpicomm'): comm = args.mpicomm else: comm = MPI.COMM_WORLD size = comm.Get_size() # total number of processes rank = comm.Get_rank() # rank of this process tsimstart = perf_counter() print('MPI master ({}, rank {}) on {} using {} workers\n'.format( comm.name, rank, hostname, numworkers)) # Assemble a sys.argv replacement to pass to spawned worker # N.B This is required as sys.argv not available when gprMax is called via api() # Ignore mpicomm object if it exists as only strings can be passed via spawn myargv = [] for key, value in vars(args).items(): if value: if 'inputfile' in key: myargv.append(value) elif 'gpu' in key: myargv.append('-' + key) if not isinstance(value, list): myargv.append(str(value.deviceID)) elif 'mpicomm' in key: pass elif '_' in key: key = key.replace('_', '-') myargv.append('--' + key) myargv.append(str(value)) else: myargv.append('-' + key) myargv.append(str(value)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numworkers) # Spawn workers newcomm = comm.Spawn(sys.executable, args=['-m', 'gprMax'] + myargv + [workerflag], maxprocs=numworkers) # Reply to whoever asks until done for work in worklist: newcomm.recv(source=MPI.ANY_SOURCE, status=status) newcomm.send(obj=work, dest=status.Get_source()) # Shutdown communicators newcomm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## elif workerflag in sys.argv: # Connect to parent to get communicator try: comm = MPI.Comm.Get_parent() rank = comm.Get_rank() except ValueError: raise ValueError( 'MPI worker (rank {}) could not connect to parent') # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {} RAM '.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # If Taguchi optimistaion, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print('MPI worker (rank {}) starting model {}/{}{} on {}\n'.format( rank, currentmodelrun, numbermodelruns, gpuinfo, hostname)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
from mpi4py import MPI from bs4 import BeautifulSoup import requests import pandas as pd # initialize MPI comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() stat = MPI.Status() # create dictionary to store date, rating, review_text values ff_reviews = {'date':[], 'rating':[], 'review':[]} # 584 pages of reviews = ~11680 total reviews for n in range(584): if n % size == rank: if n == 0: end = '' else: end = '?start=' + str(n * 20) url = 'https://www.yelp.com/biz/founding-farmers-dc-washington-4' + end response = requests.get(url) html_soup = BeautifulSoup(response.text,'html.parser') info = html_soup.find_all('div', {'class':'review-content'}) for i in info: review_text = i.find('p', {'lang':'en'}).text.strip() ff_reviews['review'].append(review_text)
from mpi4py import MPI from bs4 import BeautifulSoup import requests comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() status = MPI.Status() domains = open("domains.txt", "r").readlines() workers = size - 1 if rank == 0: data = None while workers > 0: comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) worker = status.Get_source() print("[Dispatch] contacted by worker %d" % worker) if domains: print("[Dispatch] gives work to worker %d." % worker) data = domains.pop().rstrip() comm.send(data, dest=worker, tag=1) else: print("[Dispatch] retires worker %d." % worker) comm.send(data, dest=worker, tag=9) workers -= 1 else: data = None while True:
def correct_beta_z0(self): # Send coordinate updates to neighbors for all nonzero coordinates in # z0 msg_send, msg_recv = [0] * self.n_workers, [0] * self.n_workers for k0, *pt0 in zip(*self.z0.nonzero()): # Notify neighboring workers of the update if needed. pt_global = self.workers_segments.get_global_coordinate( self.rank, pt0) workers = self.workers_segments.get_touched_segments( pt=pt_global, radius=np.array(self.overlap) + 1) msg = np.array([k0, *pt_global, self.z0[(k0, *pt0)]], 'd') self.notify_neighbors(msg, workers) for i in workers: msg_send[i] += 1 n_init_done = 0 done_pt = set() no_msg, init_done = False, False mpi_status = MPI.Status() while not init_done: if n_init_done == self.n_workers: for i_worker in range(1, self.n_workers): self.notify_worker_status(constants.TAG_DICOD_INIT_DONE, i_worker=i_worker) init_done = True if not no_msg: if self.check_no_transitting_message(check_incoming=False): self.notify_worker_status(constants.TAG_DICOD_INIT_DONE) if self.rank == 0: n_init_done += 1 assert len(self.messages) == 0 no_msg = True if MPI.COMM_WORLD.Iprobe(status=mpi_status): tag = mpi_status.tag src = mpi_status.source if tag == constants.TAG_DICOD_INIT_DONE: if self.rank == 0: n_init_done += 1 else: init_done = True msg = np.empty(self.size_msg, 'd') MPI.COMM_WORLD.Recv([msg, MPI.DOUBLE], source=src, tag=tag) if tag == constants.TAG_DICOD_UPDATE_BETA: msg_recv[src] += 1 k0, *pt_global, dz = msg k0 = int(k0) pt_global = tuple([int(v) for v in pt_global]) pt0 = self.workers_segments.get_local_coordinate( self.rank, pt_global) pt_exist = self.workers_segments.is_contained_coordinate( self.rank, pt0, inner=False) if not pt_exist and (k0, *pt0) not in done_pt: done_pt.add((k0, *pt0)) self.coordinate_update(k0, pt0, dz, coordinate_exist=False) else: time.sleep(.001)
def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Get name of processor/host name = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Number of workers and command line flag to indicate a spawned worker worker = '--mpi-worker' numberworkers = args.mpi - 1 # Master process if worker not in sys.argv: tsimstart = perf_counter() print('MPI master rank (PID {}) on {} using {} workers'.format( os.getpid(), name, numberworkers)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numberworkers) # Spawn workers comm = MPI.COMM_WORLD.Spawn( sys.executable, args=['-m', 'gprMax', '-n', str(args.n)] + sys.argv[1::] + [worker], maxprocs=numberworkers) # Reply to whoever asks until done status = MPI.Status() for work in worklist: comm.recv(source=MPI.ANY_SOURCE, status=status) comm.send(obj=work, dest=status.Get_source()) # Shutdown comm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) # Worker process elif worker in sys.argv: # Connect to parent try: comm = MPI.Comm.Get_parent() # get MPI communicator object rank = comm.Get_rank() # rank of this process except ValueError: raise ValueError('Could not connect to parent') # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {} RAM '.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) print('MPI worker rank {} (PID {}) starting model {}/{}{} on {}'. format(rank, os.getpid(), currentmodelrun, numbermodelruns, gpuinfo, name)) # If Taguchi optimistaion, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
""" MPI multiprocessing. """ import logging import os try: from mpi4py import MPI mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() mpi_status = MPI.Status() use_multiprocessing = mpi_comm.Get_size() >= 2 except: use_multiprocessing = False global_multiproc_dict = {} mpi_master = 0 class MPILogFile(MPI.File): def write(self, *args, **kwargs): self.Write_shared(*args, **kwargs) class MPIFileHandler(logging.FileHandler): "MPI file class for logging process communication." def __init__(self, filename, mode=MPI.MODE_WRONLY,
def wrapper_explore(graph: DiGraph, source_nodes: List[Any], target_nodes: List[Any], lmax: int, simple_paths: bool): """Calculate the effect of sources nodes over multiple target nodes on a directed graph. :param graph: directed graph :param source_nodes: iterable with sources nodes (usually drugs) :param target_nodes: iterable with target nodes (usually diseases) :param lmax: maximum length of the path allowed :param simple_paths: if true, only simple paths are calculated :return: effect of the source node on each target node """ _check_generic_input(graph, source_nodes, target_nodes) results_by_source: List = [] time_cache = defaultdict(dict) # Store history of the visited path for the given node previous_history = {} # Cycle History: # [0] Dict to store pre-calculated cycles # [1] Dict to store number of cycles from source to target cycle_history = [{}, {}] # Path count to all targets, by source count_by_source = {} cycles_by_source = {} # Get the reduced version of the graph and the node2id mapping reduced_graph, node2id = generate_reduced_graph(graph, target_nodes) # Initialize MPI environment and variables, if found number_of_processes = 1 process_id = 0 try: from mpi4py import MPI comm = MPI.COMM_WORLD process_id = comm.Get_rank() number_of_processes = comm.Get_size() except ImportError: pass _target_nodes = [node2id[target_node] for target_node in target_nodes] _source_nodes = [source_node for source_node in source_nodes] if process_id > 0 or number_of_processes == 1: source_index = 0 if process_id > 0: source_node = comm.recv(source=0, tag=process_id) else: source_node = _source_nodes[source_index] work_done = 0 while source_node != -1: # Get node identifiers _source_node = node2id[source_node] exe_t_0 = time.time() # Calculate all paths between source and target _, count = compute_all_paths_multitarget_dict( graph=reduced_graph, source=_source_node, targets=_target_nodes, lmax=lmax, previous_history=previous_history, cycle_history=cycle_history, simple_paths=simple_paths) # Cache time needed exe_t_f = time.time() time_cache[source_node] = exe_t_f - exe_t_0 count_by_source[source_node] = count cycles_by_source[source_node] = cycle_history[1] cycle_history[1] = {} if process_id > 0: comm.send(process_id, dest=0, tag=0) source_node = comm.recv(source=0, tag=process_id) else: source_index += 1 if source_index >= len(_source_nodes): source_node = -1 else: source_node = _source_nodes[source_index] work_done += 1 else: # process_id = 0 and number_of_processes > 1 free_workers = [worker for worker in range(1, number_of_processes)] status = MPI.Status() # Get MPI status object for source_index, source_node in tqdm(enumerate(source_nodes), total=len(source_nodes)): if free_workers: worker = free_workers.pop(0) comm.send(source_node, dest=worker, tag=worker) else: # Wait until a worker finishes worker = comm.recv(source=MPI.ANY_SOURCE, tag=0, status=status) req = comm.isend(source_node, dest=worker, tag=worker) # Wait until all workers have finished their work while len(free_workers) < number_of_processes - 1: worker = comm.recv(source=MPI.ANY_SOURCE, tag=0, status=status) free_workers.append(worker) # After all source nodes are processed, notify workers that there's no more work. for worker in range(1, number_of_processes): code = -1 comm.send(code, dest=worker, tag=worker) # Master (process_id == 0) receives partial results from other processes if process_id == 0 and number_of_processes > 1: # print(f'Results from master: {len(count_by_source)}') print( f'Waiting to receive partial results from {number_of_processes - 1} other processes.' ) status = MPI.Status() # Get MPI status object for i in range(number_of_processes - 1): partial_results = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() print(f'Received {len(partial_results)} from process {source}.') count_by_source.update(partial_results[0]) cycles_by_source.update(partial_results[1]) # Order results by source id. This is only to make validation easier # for source_node in source_nodes: # results_by_source.append(recv_results[source_node]) elif number_of_processes > 1: print(f'[{process_id}] Sending results to master.') results = [count_by_source, cycles_by_source] comm.send(results, dest=0, tag=process_id) if process_id == 0: for target_index, target_node in enumerate(target_nodes): # target_id = node2id[target_node] for source_node in source_nodes: if source_node not in count_by_source: continue relative_activation, relative_inhibition, paths_to_target = count_by_source[ source_node][target_index] # number_of_cycles = 0 # if source_node in cycles_by_source: # if target_id in cycles_by_source[source_node]: # number_of_cycles = cycles_by_source[source_node][target_id] if not paths_to_target: continue results_by_source.append( dict( source=source_node, target=target_node, relative_activation=relative_activation, relative_inhibition=relative_inhibition, number_of_paths=paths_to_target, # number_of_cycles=number_of_cycles, )) if not results_by_source: logger.warning( 'There are no paths between the sources and any targets') return results_by_source, time_cache
def reduceToPrevious(self, context, reducer, func, dataList, *args, **kwargs): """!Reduction where work goes to the same target as before Work is distributed so that each slave handles the same indices in the dataList as when 'map' was called. This allows the right data to go to the right cache. It is assumed that the dataList is the same length as when it was passed to 'map'. The 'func' signature should be func(cache, data, *args, **kwargs). The 'reducer' signature should be reducer(old, new). If the 'reducer' is None, then we will return the full list of results @param context: Namespace for cache @param reducer: function for master to run to reduce slave results; or None @param func: function for slaves to run; must be picklable @param dataList: List of data to distribute to slaves; must be picklable @param args: List of constant arguments @param kwargs: Dict of constant arguments @return reduced result (if reducer is non-None) or list of results from applying 'func' to dataList """ if context is None: raise ValueError( "context must be set to map to same nodes as previous context") tags = Tags("result", "work") num = len(dataList) if self.size == 1 or num <= 1: # Can do everything here return self._reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs) if self.size == num: # We're shooting ourselves in the foot using dynamic distribution return self.reduceNoBalance(context, reducer, func, dataList, *args, **kwargs) self.command("mapToPrevious") # Send function self.log("instruct") self.comm.broadcast((tags, func, args, kwargs, context), root=self.root) requestList = self.comm.gather(None, root=self.root) self.log("listen", requestList) initial = [ dataList[index] if (index is not None and index >= 0) else None for index in requestList ] self.log("scatter jobs", initial) self.comm.scatter(initial, root=self.root) pending = min(num, self.size - 1) if reducer is None: output = [None] * num else: thread = ReductionThread(reducer) thread.start() while pending > 0: status = mpi.Status() index, result, nextIndex = self.comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE) source = status.source self.log("gather from slave", source) if reducer is None: output[index] = result else: thread.add(result) if nextIndex >= 0: job = dataList[nextIndex] self.log("send job to slave", source) self.comm.send(job, source, tag=tags.work) else: pending -= 1 self.log("waiting on", pending) if reducer is not None: output = thread.join() self.log("done") return output
def boss(): comm = MPI.COMM_WORLD status = MPI.Status() num_workers = MPI.COMM_WORLD.Get_size() ############################################## # Collect information on available CFS forecasts # TODO: extend this for the full years begin_date = tools.string_to_date(str(config['historic_years_begin']) + '0101', h=False) end_date = tools.string_to_date(str(config['historic_years_end']) + '1231', h=False) # CFS has forecasts every 6 hours date_range_6h = pd.date_range(begin_date, end_date, freq='6H').to_pydatetime() cfs = cfs_tools.cfs_ftp_info() # Check which ones are available. After 2011 they are available every day, # every 6 hours. But reforecasts from 1982-2010 are only every 5th day date_range_6h = [d for d in date_range_6h if cfs.forecast_available(d)] # Each job consists of a file to download along with it's associated # initial time job_list = [] for d in date_range_6h: download_url = cfs.forecast_url_from_timestamp(forecast_time=d, protocal='http') job_list.append({'download_url': download_url, 'forecast_date': d}) cfs.close() num_jobs = len(job_list) #Dole out the first round of jobs to all workers for i in range(1, num_workers): job_info = job_list.pop() comm.send(obj=job_info, dest=i, tag=work_tag) #While there are new jobs to assign. #Collect results and assign new jobs as others are finished. results = [] while len(job_list) > 0: next_job_info = job_list.pop() job_result = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) results.append(job_result) num_finished_jobs = len(results) print('completed: ' +str(num_finished_jobs)+'/'+str(num_jobs) +' , ' + \ str(job_result['forecast_date']) + ' ' + str(job_result) + ' ' + \ str(job_result['processing_time_min'])) comm.send(obj=next_job_info, dest=status.Get_source(), tag=work_tag) #Collect last jobs for i in range(1, num_workers): job_result = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) results.append(job_result) #Shut down all workers for i in range(1, num_workers): comm.send(obj=None, dest=i, tag=stop_tag)
def reduce(self, context, reducer, func, dataList, *args, **kwargs): """!Scatter work to slaves and reduce the results Work is distributed dynamically, so that slaves that finish quickly will receive more work. Each slave applies the function to the data they're provided. The slaves may optionally be passed a cache instance, which they can use to store data for subsequent executions (to ensure subsequent data is distributed in the same pattern as before, use the 'mapToPrevious' method). The cache also contains data that has been stored on the slaves. The 'func' signature should be func(cache, data, *args, **kwargs) if 'context' is non-None; otherwise func(data, *args, **kwargs). The 'reducer' signature should be reducer(old, new). If the 'reducer' is None, then we will return the full list of results @param context: Namespace for cache @param reducer: function for master to run to reduce slave results; or None @param func: function for slaves to run; must be picklable @param dataList: List of data to distribute to slaves; must be picklable @param args: List of constant arguments @param kwargs: Dict of constant arguments @return reduced result (if reducer is non-None) or list of results from applying 'func' to dataList """ tags = Tags("request", "work") num = len(dataList) if self.size == 1 or num <= 1: return self._reduceQueue(context, reducer, func, list(zip(list(range(num)), dataList)), *args, **kwargs) if self.size == num: # We're shooting ourselves in the foot using dynamic distribution return self.reduceNoBalance(context, reducer, func, dataList, *args, **kwargs) self.command("reduce") # Send function self.log("instruct") self.comm.broadcast((tags, func, reducer, args, kwargs, context), root=self.root) # Parcel out first set of data queue = list(zip(range(num), dataList)) # index, data output = [None] * num if reducer is None else None initial = [ None if i == self.rank else queue.pop(0) if queue else NoOp() for i in range(self.size) ] pending = min(num, self.size - 1) self.log("scatter initial jobs") self.comm.scatter(initial, root=self.rank) while queue or pending > 0: status = mpi.Status() report = self.comm.recv(status=status, tag=tags.request, source=mpi.ANY_SOURCE) source = status.source self.log("gather from slave", source) if reducer is None: index, result = report output[index] = result if queue: job = queue.pop(0) self.log("send job to slave", job[0], source) else: job = NoOp() pending -= 1 self.comm.send(job, source, tag=tags.work) if reducer is not None: results = self.comm.gather(None, root=self.root) output = None for rank in range(self.size): if rank == self.root: continue output = reducer( output, results[rank]) if output is not None else results[rank] self.log("done") return output
def reduceNoBalance(self, context, reducer, func, dataList, *args, **kwargs): """!Scatter work to slaves and reduce the results Work is distributed statically, so there is no load balancing. Each slave applies the function to the data they're provided. The slaves may optionally be passed a cache instance, which they can store data in for subsequent executions (to ensure subsequent data is distributed in the same pattern as before, use the 'mapToPrevious' method). The cache also contains data that has been stored on the slaves. The 'func' signature should be func(cache, data, *args, **kwargs) if 'context' is true; otherwise func(data, *args, **kwargs). The 'reducer' signature should be reducer(old, new). If the 'reducer' is None, then we will return the full list of results @param context: Namespace for cache @param reducer: function for master to run to reduce slave results; or None @param func: function for slaves to run; must be picklable @param dataList: List of data to distribute to slaves; must be picklable @param args: List of constant arguments @param kwargs: Dict of constant arguments @return reduced result (if reducer is non-None) or list of results from applying 'func' to dataList """ tags = Tags("result", "work") num = len(dataList) if self.size == 1 or num <= 1: return self._reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs) self.command("mapNoBalance") # Send function self.log("instruct") self.comm.broadcast((tags, func, args, kwargs, context), root=self.root) # Divide up the jobs # Try to give root the least to do, so it also has time to manage queue = list(zip(range(num), dataList)) # index, data if num < self.size: distribution = [[queue[i]] for i in range(num)] distribution.insert(self.rank, []) for i in range(num, self.size - 1): distribution.append([]) elif num % self.size == 0: numEach = num // self.size distribution = [ queue[i * numEach:(i + 1) * numEach] for i in range(self.size) ] else: numEach = num // self.size distribution = [ queue[i * numEach:(i + 1) * numEach] for i in range(self.size) ] for i in range(numEach * self.size, num): distribution[(self.rank + 1) % self.size].append distribution = list([] for i in range(self.size)) for i, job in enumerate(queue, self.rank + 1): distribution[i % self.size].append(job) # Distribute jobs for source in range(self.size): if source == self.rank: continue self.log("send jobs to ", source) self.comm.send(distribution[source], source, tag=tags.work) # Execute our own jobs output = [None] * num if reducer is None else None def ingestResults(output, nodeResults, distList): if reducer is None: for i, result in enumerate(nodeResults): index = distList[i][0] output[index] = result return output if output is None: output = nodeResults.pop(0) for result in nodeResults: output = reducer(output, result) return output ourResults = self._processQueue(context, func, distribution[self.rank], *args, **kwargs) output = ingestResults(output, ourResults, distribution[self.rank]) # Collect results pending = self.size - 1 while pending > 0: status = mpi.Status() slaveResults = self.comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE) source = status.source self.log("gather from slave", source) output = ingestResults(output, slaveResults, distribution[source]) pending -= 1 self.log("done") return output
def backward_signal_kill(self, g, communicator, req_send_check, cur_step): mod_avail_index = len(self.full_modules) - 1 channel_index = self._init_channel_index - 2 mod_counters_ = [0] * len(self.full_modules) # should kill flag should_kill = False for i, output in reversed(list(enumerate(self.output))): ############################ killing process on workers ##################################### for _ in range(100): status = MPI.Status() communicator.Iprobe(0, 77, status) if status.Get_source() == 0: print("Worker {}, Cur Step: {} I'm the straggler, killing myself!".format(communicator.Get_rank(), cur_step)) tmp = communicator.recv(source=0, tag=77) should_kill = True break if should_kill: channel_index = -5 break ############################################################################################ if i == (len(self.output) - 1): # for last node, use g output.backward(g) # get gradient here after some sanity checks: tmp_grad = self.full_modules[mod_avail_index].weight.grad if not pd.isnull(tmp_grad): grads = tmp_grad.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) # update counters mod_avail_index -= 1 channel_index -= 1 else: continue else: if output.size() == self.input[i + 1].grad.size(): output.backward(self.input[i + 1].grad.data) else: tmp_grad_output = self.input[i + 1].grad.view(output.size()) output.backward(tmp_grad_output) # since in resnet we do not use bias weight for conv layer if pd.isnull(self.full_modules[mod_avail_index].bias): tmp_grad_weight = self.full_modules[mod_avail_index].weight.grad if not pd.isnull(tmp_grad_weight): grads = tmp_grad_weight.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] = 2 # update counters mod_avail_index -= 1 else: continue else: tmp_grad_weight = self.full_modules[mod_avail_index].weight.grad tmp_grad_bias = self.full_modules[mod_avail_index].bias.grad if not pd.isnull(tmp_grad_weight) and not pd.isnull(tmp_grad_bias): # we always send bias first if mod_counters_[mod_avail_index] == 0: grads = tmp_grad_bias.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] += 1 elif mod_counters_[mod_avail_index] == 1: grads = tmp_grad_weight.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] += 1 # update counters mod_avail_index -= 1 else: continue # handle the remaining gradients here to send to parameter server while channel_index >= 0: if pd.isnull(self.full_modules[mod_avail_index].bias): tmp_grad_weight = self.full_modules[mod_avail_index].weight.grad grads = tmp_grad_weight.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] = 2 # update counters mod_avail_index -= 1 else: tmp_grad_weight = self.full_modules[mod_avail_index].weight.grad tmp_grad_bias = self.full_modules[mod_avail_index].bias.grad # we always send bias first if mod_counters_[mod_avail_index] == 0: grads = tmp_grad_bias.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] += 1 elif mod_counters_[mod_avail_index] == 1: grads = tmp_grad_weight.data.numpy().astype(np.float64) req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88 + channel_index) req_send_check.append(req_isend) channel_index -= 1 mod_counters_[mod_avail_index] += 1 # update counters mod_avail_index -= 1 if channel_index == -1: killed = False elif channel_index == -5: killed = True return req_send_check, killed
def controller(lower, upper): #Set up the basic MPI stuff comm = MPI.COMM_WORLD nproc = comm.Get_size() rank = comm.Get_rank() #Setup values for array of flags length = upper - lower flags = numpy.zeros(length) #Offset of last dispatched value current_val = 0 #Number of in-flight work packets inflight = 0 precheck_num = 0 #How many primes to process precheck_to = 20 #Arrays holding data per worker: #Value last sent to worker vals_in_use = numpy.zeros(nproc - 1) #Workers stats - how many processed in how long processed = numpy.zeros(nproc - 1) start_time = numpy.zeros(nproc - 1) cum_time = numpy.zeros(nproc - 1) end_time = numpy.zeros(nproc - 1) #Some things need to have the correct type BEFORE the MPI calls info = MPI.Status() request = MPI.Request() while True: #Use non-blocking commands although this variant could just as well use blocking #and not post the recieve until after it did the pre-check # Unlike normal MPI, irecv here takes a buffer size only and # the actual result is returned by the wait # First param is buffer size in bytes. request = comm.irecv(4, source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) #Do some work before waiting if precheck_num < precheck_to: precheck_flags(lower, length, flags, precheck_num) precheck_num = precheck_num + 1 result = request.wait(status=info) if info.tag > 0: # Capture stats end_time[info.source - 1] = time.time() cum_time[info.source - 1] = cum_time[info.source - 1] + ( end_time[info.source - 1] - start_time[info.source - 1]) processed[info.source - 1] = processed[info.source - 1] + 1 offset = vals_in_use[info.source - 1] - lower #Store result #Cheat - if prime mark as 2, (True + 1), else as composite, 1 (False+1) flags[int(offset)] = result + 1 inflight = inflight - 1 if current_val < length: #If there is still work to do, reply with next package #Skip any values that have already been checked i.e. are 1 or 2 #The precheck_flags routine may mark some things as composite while flags[current_val] != 0 and current_val < length: current_val = current_val + 1 vals_in_use[info.source - 1] = lower + current_val #print("Dispatching ", lower+current_val) start_time[info.source - 1] = time.time() current_val = current_val + 1 comm.send(vals_in_use[info.source - 1], dest=info.source, tag=1) inflight = inflight + 1 else: #No more work, shut down the worker comm.send(1, dest=info.source, tag=0) if inflight == 0: #Nothing is in flight, all done break #Summarize findings for i in range(0, nproc - 1): print("Worker ", i, " processed ", int(processed[i - 1]), " packets in ", cum_time[i - 1], "s") #Total the number of elements marked prime (==2) and divide by 2 to get number print("Found ", int(numpy.sum(flags[flags == 2]) / 2), " primes")
def run_mpi_alt_sim(args, inputfile, usernamespace, optparams=None): """ Alternate MPI implementation that avoids using the spawn mechanism. Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Define MPI message tags tags = Enum('tags', {'READY': 0, 'DONE': 1, 'EXIT': 2, 'START': 3}) # Initializations and preliminaries comm = MPI.COMM_WORLD size = comm.Get_size() # total number of processes rank = comm.Get_rank() # rank of this process status = MPI.Status() # get MPI status object hostname = MPI.Get_processor_name() # get name of processor/host # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n currentmodelrun = modelstart # can use -task argument to start numbering from something other than 1 numworkers = size - 1 ################## # Master process # ################## if rank == 0: tsimstart = perf_counter() print('MPI master (rank {}, PID {}) on {} using {} workers\n'.format( rank, os.getpid(), hostname, numworkers)) closedworkers = 0 while closedworkers < numworkers: data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() # Worker is ready, so send it a task if tag == tags.READY.value: if currentmodelrun < modelend: comm.send(currentmodelrun, dest=source, tag=tags.START.value) currentmodelrun += 1 else: comm.send(None, dest=source, tag=tags.EXIT.value) # Worker has completed a task elif tag == tags.DONE.value: pass # Worker has completed all tasks elif tag == tags.EXIT.value: closedworkers += 1 # Shutdown communicator comm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## else: while True: comm.send(None, dest=0, tag=tags.READY.value) # Receive a model number to run from the master currentmodelrun = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() # Run a model if tag == tags.START.value: # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {}'.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # If Taguchi optimistaion, add specific value for each parameter # to optimise for each experiment to user accessible namespace if optparams: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in optparams.items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print('MPI worker (rank {}) starting model {}/{}{} on {}\n'. format(rank, currentmodelrun, numbermodelruns, gpuinfo, hostname)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) comm.send(None, dest=0, tag=tags.DONE.value) # Break out of loop when work receives exit message elif tag == tags.EXIT.value: break comm.send(None, dest=0, tag=tags.EXIT.value)
def __MPI_MAIN__(parser): comm = MPI.COMM_WORLD rank = comm.Get_rank() stat = MPI.Status() nproc = comm.Get_size() # Parent processor if rank == 0: print('STARTING RANK 0') # Profile code execution. prog_start_time = time.time() parse_args = parser.parse_args() # Initialize parameter values inputs_cfg = parse_args.inputs_cfg # Construct list of calls to run from shell. hotpotato = set_defaults(read_config(inputs_cfg)) # Create output directory if non-existent. if not os.path.isdir(hotpotato['OUTPUT_DIR']): os.makedirs(hotpotato['OUTPUT_DIR']) # Load information on single pulse candidates. metadata, cand_DMs, cand_sigma, cand_dedisp_times, cand_dedisp_samples, cand_downfact, select_indices = filter_spcands( hotpotato) if len(select_indices) == 0: print( 'No single pulse candidates fit the user-supplied selection criteria. Quitting program' ) sys.exit(1) # Read header of filterbank file. hdr = Header(hotpotato['DATA_DIR'] + '/' + hotpotato['fil_file'], file_type='filterbank') # Returns a Header object tot_time_samples = hdr.ntsamples # Total no. of time samples in entire dynamic spectrum. t_samp = hdr.t_samp # Sampling time (s) chan_bw = hdr.chan_bw # Channel bandwidth (MHz) nchans = hdr.nchans # No. of channels npol = hdr.npol # No. of polarizations n_bytes = hdr.primary['nbits'] / 8.0 # No. of bytes per data sample hdr_size = hdr.primary['hdr_size'] # Header size (bytes) # Set up frequency array. Frequencies in GHz. freqs_GHz = (hdr.fch1 + np.arange(nchans) * chan_bw) * 1e-3 print(hdr) # Flip frequency axis if chan_bw<0. if (chan_bw < 0): print('Channel bandwidth is negative.') freqs_GHz = np.flip(freqs_GHz) print('Frequencies rearranged in ascending order.') # Chop bandpass edges. hotpotato['ind_band_low'] = np.where( freqs_GHz >= hotpotato['freq_band_low'])[0][0] hotpotato['ind_band_high'] = np.where( freqs_GHz <= hotpotato['freq_band_high'])[0][-1] + 1 # Clip bandpass edges. freqs_GHz = freqs_GHz[ hotpotato['ind_band_low']:hotpotato['ind_band_high']] # Load median bandpass, if pre-computed. if hotpotato['bandpass_method'] == 'file': print('Loading median bandpass from %s' % (hotpotato['bandpass_npz'])) hotpotato['median_bp'] = np.load( hotpotato['BANDPASS_DIR'] + '/' + hotpotato['bandpass_npz'], allow_pickle=True)['Median bandpass'] hotpotato['median_bp'] = hotpotato['median_bp'][ hotpotato['ind_band_low']:hotpotato['ind_band_high']] print('Median bandpass loaded.') elif hotpotato['bandpass_method'] not in ['file', 'compute']: print('Unrecognized bandpass computation method.') sys.exit(1) # Load rfifind mask. print('Reading rfifind mask: %s' % (hotpotato['rfimask'])) nint, int_times, ptsperint, mask_zap_chans, mask_zap_ints, mask_zap_chans_per_int = read_rfimask( hotpotato['RFIMASK_DIR'] + '/' + hotpotato['rfimask']) mask_zap_chans, mask_zap_chans_per_int = modify_zapchans_bandpass( mask_zap_chans, mask_zap_chans_per_int, hotpotato['ind_band_low'], hotpotato['ind_band_high']) if nproc == 1: f = open(hotpotato['DATA_DIR'] + '/' + hotpotato['fil_file'], 'rb') for i in range(len(select_indices)): cand_index = select_indices[i] downfact = cand_downfact[cand_index] myexecute(cand_index, cand_DMs, cand_sigma, cand_dedisp_times, downfact, metadata, int_times, mask_zap_chans, mask_zap_chans_per_int, freqs_GHz, tot_time_samples, t_samp, chan_bw, npol, nchans, n_bytes, hdr_size, hotpotato, f, rank) f.close() else: # Distribute candidates evenly among child processors. indices_dist_list = np.array_split(select_indices, nproc - 1) # Send data to child processors. for indx in range(1, nproc): comm.send((indices_dist_list[indx - 1], cand_DMs, cand_sigma, cand_dedisp_times, cand_downfact, metadata, int_times, mask_zap_chans, mask_zap_chans_per_int, freqs_GHz, tot_time_samples, t_samp, chan_bw, npol, nchans, n_bytes, hdr_size, hotpotato), dest=indx, tag=indx) comm.Barrier( ) # Wait for all child processors to receive sent call. # Receive Data from child processors after execution. comm.Barrier() # Calculate total run time for the code. prog_end_time = time.time() run_time = (prog_end_time - prog_start_time) / 60.0 print('Code run time = %.5f minutes' % (run_time)) print('FINISHING RANK 0') else: # Recieve data from parent processor. indx_vals, cand_DMs, cand_sigma, cand_dedisp_times, cand_downfact, metadata, int_times, mask_zap_chans, mask_zap_chans_per_int, freqs_GHz, tot_time_samples, t_samp, chan_bw, npol, nchans, n_bytes, hdr_size, hotpotato = comm.recv( source=0, tag=rank) comm.Barrier() print('STARTING RANK: ', rank) f = open(hotpotato['DATA_DIR'] + '/' + hotpotato['fil_file'], 'rb') for i in range(len(indx_vals)): cand_index = indx_vals[i] downfact = cand_downfact[cand_index] myexecute(cand_index, cand_DMs, cand_sigma, cand_dedisp_times, downfact, metadata, int_times, mask_zap_chans, mask_zap_chans_per_int, freqs_GHz, tot_time_samples, t_samp, chan_bw, npol, nchans, n_bytes, hdr_size, hotpotato, f, rank) f.close() print('FINISHING RANK: ', rank) comm.Barrier()
if len(sys.argv)==1: print("tamanho do vetor") exit() if rank==0: tamvet=int(sys.argv[1]) vet=np.arange(tamvet) tamsubvet=tamvet//escravos resto=tamvet-tamsubvet*escravos for i in range(escravos): pos=i*tamsubvet if (i==escravos-1): comm.Send([vet[pos:],tamsubvet+resto,MPI.INT],i+1) else: comm.Send([vet[pos:],tamsubvet,MPI.INT],i+1) soma=comm.recv(source=i+1) print("soma=",soma) else: status=MPI.Status()#pega o tamanho do vetor comm.Probe(MPI.ANY_SOURCE,MPI.ANY_TAG,status) tamsubvet=status.Get_elements(MPI.INT)##converte o numero de posicoes do vetor pra int vet=np.empty(tamsubvet,dtype=np.int) comm.Recv(vet) resultado=sum(vet) print("[%d] recebeu:"%(rank),vet) comm.send(resultado,dest=0)
def _master_loop(self): log_debug(logger, "Master loop started") t_start = time.time() slices = numpy.zeros(self.comm.size, 'i') closed = numpy.zeros(self.comm.size, 'i') self._i = 0 while True: # Transfer data and metadata for numpy arrays that do not need to be pickled t0 = time.time() status = MPI.Status() l = self.comm.recv(source=MPI.ANY_SOURCE, tag=0, status=status) source = status.Get_source() t1 = time.time() t_wait = t1 - t0 if source == 0: logger.warning('Received write package from master process! Skipping writing.') print(l) continue if l == "close": closed[source] = 1 if closed.sum() == self.comm.size-1: break else: t0 = time.time() # Transfer data without pickling self._transfer_numpy_arrays(l, source=source) t1 = time.time() t_recv = t1 - t0 if "write_slice" in l: t0 = time.time() # WRITE SLICE TO FILE log_debug(logger, "Write slice to file") self._write_slice_master(l["write_slice"], i=self._i)#slices[source]*(self.comm.size-1)+source-1) self._i += 1 slices[source] += 1 # -- t1 = time.time() t_write = t1 - t0 log_info(logger, "Writing rate %.1f Hz; slice %i (writing %.4f sec; waiting %.4f sec, receiving %.4f sec)" % (slices.sum()/(time.time()-t_start),self._i-1,t_write,t_wait,t_recv)) if "write_solo" in l: # WRITE SOLO TO FILE log_debug(logger, "Write solo to file") self._write_solo_master(l["write_solo"]) # -- log_debug(logger, "Master writer is closing.") self._resize_stacks(self._i_max + 1) self._f.close() log_debug(logger, "File %s closed." % self._filename)
def _master(self): """Master node's operation. Assigning tasks to workers and collecting results from them Parameters ---------- None Returns ------- results: list of tuple (voxel_id, accuracy) the accuracy numbers of all voxels, in accuracy descending order the length of array equals the number of voxels """ logger.info('Master at rank %d starts to allocate tasks', MPI.COMM_WORLD.Get_rank()) results = [] comm = MPI.COMM_WORLD size = comm.Get_size() sending_voxels = self.voxel_unit if self.voxel_unit < self.num_voxels \ else self.num_voxels current_task = (0, sending_voxels) status = MPI.Status() # using_size is used when the number of tasks # is smaller than the number of workers using_size = size for i in range(0, size): if i == self.master_rank: continue if current_task[1] == 0: using_size = i break logger.debug('master starts to send a task to worker %d' % i) comm.send(current_task, dest=i, tag=self._WORKTAG) next_start = current_task[0] + current_task[1] sending_voxels = self.voxel_unit \ if self.voxel_unit < self.num_voxels - next_start \ else self.num_voxels - next_start current_task = (next_start, sending_voxels) while using_size == size: if current_task[1] == 0: break result = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) results += result comm.send(current_task, dest=status.Get_source(), tag=self._WORKTAG) next_start = current_task[0] + current_task[1] sending_voxels = self.voxel_unit \ if self.voxel_unit < self.num_voxels - next_start \ else self.num_voxels - next_start current_task = (next_start, sending_voxels) for i in range(0, using_size): if i == self.master_rank: continue result = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) results += result for i in range(0, size): if i == self.master_rank: continue comm.send(None, dest=i, tag=self._TERMINATETAG) return results
def start(self): """ Start the Manager process. The worker loops on this: 1. If the last message sent was older than heartbeat period we send a heartbeat 2. TODO: Move task receiving to a thread """ self.comm.Barrier() logger.debug("Manager synced with workers") self._kill_event = threading.Event() self._task_puller_thread = threading.Thread(target=self.pull_tasks, args=(self._kill_event,)) self._result_pusher_thread = threading.Thread(target=self.push_results, args=(self._kill_event,)) self._task_puller_thread.start() self._result_pusher_thread.start() start = None result_counter = 0 task_recv_counter = 0 task_sent_counter = 0 logger.info("Loop start") while not self._kill_event.is_set(): time.sleep(LOOP_SLOWDOWN) # In this block we attempt to probe MPI for a set amount of time, # and if we have exhausted all available MPI events, we move on # to the next block. The timer and counter trigger balance # fairness and responsiveness. timer = time.time() + 0.05 counter = min(10, comm.size) while time.time() < timer: info = MPI.Status() if counter > 10: logger.debug("Hit max mpi events per round") break if not self.comm.Iprobe(status=info): logger.debug("Timer expired, processed {} mpi events".format(counter)) break else: tag = info.Get_tag() logger.info("Message with tag {} received".format(tag)) counter += 1 if tag == RESULT_TAG: result = self.recv_result_from_workers() self.pending_result_queue.put(result) result_counter += 1 elif tag == TASK_REQUEST_TAG: worker_rank = self.recv_task_request_from_workers() self.ready_worker_queue.put(worker_rank) else: logger.error("Unknown tag {} - ignoring this message and continuing".format(tag)) available_worker_cnt = self.ready_worker_queue.qsize() available_task_cnt = self.pending_task_queue.qsize() logger.debug("[MAIN] Ready workers: {} Ready tasks: {}".format(available_worker_cnt, available_task_cnt)) this_round = min(available_worker_cnt, available_task_cnt) for i in range(this_round): worker_rank = self.ready_worker_queue.get() task = self.pending_task_queue.get() comm.send(task, dest=worker_rank, tag=worker_rank) task_sent_counter += 1 logger.debug("Assigning worker:{} task:{}".format(worker_rank, task['task_id'])) if not start: start = time.time() logger.debug("Tasks recvd:{} Tasks dispatched:{} Results recvd:{}".format( task_recv_counter, task_sent_counter, result_counter)) # print("[{}] Received: {}".format(self.identity, msg)) # time.sleep(random.randint(4,10)/10) self._task_puller_thread.join() self._result_pusher_thread.join() self.task_incoming.close() self.result_outgoing.close() self.context.term() delta = time.time() - start logger.info("mpi_worker_pool ran for {} seconds".format(delta))
def proc_work(rank): status = MPI.Status() bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) n_datasets = bcast_msg print "".join(["WORKER ", str(rank), ": Received broadcast msg"]) base_start_yr = str(esd.cfg.start_date_baseline.year) base_end_yr = str(esd.cfg.end_date_baseline.year) train_start_yr = str(esd.cfg.start_date_train_downscale.year) train_end_yr = str(esd.cfg.end_date_train_downscale.year) ds_start_yr = str(esd.cfg.start_date_downscale.year) ds_end_yr = str(esd.cfg.end_date_downscale.year) downscale_wins = [('1976','2005'), ('2006','2039'), ('2040','2069'), ('2070','2099')] fpath_tair_obsc = os.path.join(esd.cfg.path_aphrodite_resample, 'aprhodite_redriver_sat_1961_2007_p25deg_remapbic.nc') fpath_prcp_obsc = os.path.join(esd.cfg.path_aphrodite_resample, 'aprhodite_redriver_pcp_1961_2007_p25deg_remapbic.nc') tair_d = TairDownscale(esd.cfg.fpath_aphrodite_tair, fpath_tair_obsc, base_start_yr, base_end_yr, train_start_yr, train_end_yr, downscale_wins) prcp_d = PrcpDownscale(esd.cfg.fpath_aphrodite_prcp, fpath_prcp_obsc, base_start_yr, base_end_yr, train_start_yr, train_end_yr, ds_start_yr, ds_end_yr) downscalers = {'tas': tair_d, 'pr': prcp_d} while 1: fpath_cmip5 = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send([None]*3, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["WORKER ", str(rank), ": Finished"]) return 0 else: print "WORKER %d: Processing %s..."%(rank, fpath_cmip5) ds = xr.open_dataset(fpath_cmip5, decode_cf=False) vname = ds.data_vars.keys()[0] ds[vname].attrs.pop('missing_value') ds = xr.decode_cf(ds) da = ds[vname].load() da_ds = downscalers[vname].downscale(da) # Add metadata and create dataset da_ds.attrs = da.attrs ds_out = da_ds.to_dataset(name=vname) ds_out.attrs = ds.attrs ds_out.attrs['comment'] = NC_COMMENT_ATTR subdir = os.path.split(os.path.split(fpath_cmip5)[0])[-1] fname = os.path.basename(fpath_cmip5) MPI.COMM_WORLD.send((subdir, fname, ds_out), dest=RANK_WRITE, tag=TAG_DOWORK) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def do_steps(self, n_steps, non_block=True, when_update=10): """ Steps through generations :param n_steps: number of generations through which to step :param non_block: boolean to determine blocking or non :param when_update: how often each rank updates in non blocking """ t_0 = time.time() if non_block: # totalAge to hold rank:age, averageAge and target_age to # hold when to stop, when_update for when to send/receive data total_age = {} average_age = self.age target_age = self.age + n_steps while average_age < target_age: if self.isle.solution_island.age % when_update == 0: if self.comm_rank == 0: # update the age in totalAge for self total_age.update({0:self.isle.solution_island.age}) # while there is data from any rank, receive until # last, and add the data to totalAge # TODO (gbomarito) could get flooded and never exit loop status = MPI.Status() while self.comm.iprobe(source=MPI.ANY_SOURCE, tag=2, status=status): data = self.comm.recv(source=status.Get_source(), tag=2) total_age.update(data) average_age = (sum(total_age.values())) / self.comm.size # send average to all other ranks if time to stop if average_age >= n_steps: scount = 1 while scount < self.comm_size: self.comm.send(average_age, dest=scount, tag=0) scount += 1 # for every other rank, store rank:age, and send it off to 0 else: data = {self.comm_rank:self.isle.solution_island.age} req = self.comm.isend(data, dest=0, tag=2) req.Wait() # if there is a message from 0 to stop, update averageAge if self.comm_rank != 0: if self.comm.iprobe(source=0, tag=0): average_age = self.comm.recv(source=0, tag=0) self.isle.generational_step() # print_pareto(isle.solution_island.pareto_front, "front.png") else: for _ in range(n_steps): self.isle.generational_step() t_1 = time.time() LOGGER.info("%2d >\tage: %d\ttime: %.1fs\tbest fitness: %s", self.comm_rank, self.isle.solution_island.age, t_1 - t_0, self.isle.solution_island.pareto_front[0].fitness) if non_block: # perform message cleanup before moving on self.comm.Barrier() if self.comm_rank == 0: status = MPI.Status() while self.comm.iprobe(source=MPI.ANY_SOURCE, tag=2, status=status): data = self.comm.recv(source=status.Get_source(), tag=2) if np.isnan(self.isle.solution_island.pareto_front[0].fitness[0]): for i in self.isle.solution_island.pop: LOGGER.error(str(i.fitness)) for indv in self.isle.solution_island.pareto_front: LOGGER.error("pareto > %s %s", str(indv.fitness), indv.latexstring()) self.age += n_steps
def setUp(self): self.REQUESTS = [MPI.Request() for i in range(5)] self.STATUSES = [MPI.Status() for i in range(5)]
def hashCheck(granule_list): # Now for the fun part. All of our files have been supposedly downloaded by Globus. We need to hash them # and compare them with the saved hash files. Split the files evenly amongst all the MPI ranks. # We use mpi_size - 1 because the master rank is not considered a slave. logger.info("Submitting the files for hashing.") hashing = [] # List of lines currently being hashed succeedHash = [] # List of lines that passed the hash check failedHash = [] # List of lines that failed the hash check numFiles = len(granule_list) # Make a copy of granule_list. We want to delete elements in the list as they're successfully processed. newGranuleList = granule_list[:] num_orig = len(newGranuleList) # We will send the files to the slaves one-by-one. We define some global tags for the master-slave communication # so that they can effectively converse with each other. while len(newGranuleList) > 0 or len(hashing) > 0: # Check for slaves that want a job stat = MPI.Status() # Class that keeps track of MPI information # No more jobs to send. Only accept hash messages if len(newGranuleList) == 0: mpi_comm.probe(tag=MPI_SLAVE_HASH_COMPLETE, status=stat) # Else, accept messages from any tag else: mpi_comm.probe(status=stat) # A slave wants a job and we have more jobs to send. if stat.Get_tag() == MPI_SLAVE_IDLE: granule = newGranuleList.pop() # Grab the orbit from the transferLine for logging purposes logger.debug("Sending slave {} hash job: {}.".format( stat.source, granule.orbit)) # There have been issues with this script causing network congestion. NCSA suspects it is # the hashing that does it. If global variable is set to true if STAGGER_HASH_JOBS: time.sleep(0.01) mpi_comm.recv(source=stat.source, tag=MPI_SLAVE_IDLE) mpi_comm.send((mpi_rank_hash, granule), dest=stat.source) # Save this line in the hashing dict to keep track of what is currently being hashed hashing.append(granule) # Slave has finished hashing a file. Check its status. elif stat.Get_tag() == MPI_SLAVE_HASH_COMPLETE: # worker will send back the granule it was given recvGranule = mpi_comm.recv(source=stat.source, tag=stat.Get_tag()) if recvGranule.hash_status == MPI_SLAVE_HASH_FAIL: # Remove this line from the hashing list found_elem = 0 for i in hashing: if i.orbit == recvGranule.orbit: hashing.remove(i) found_elem = 1 if found_elem == 0: raise RuntimeError( "Failed to find proper element to remove in list.") # Append to the list of failed hashes failedHash.append(recvGranule) elif recvGranule.hash_status == MPI_SLAVE_HASH_SUCCEED: # Remove this line from the hashing list found_elem = 0 for i in hashing: if i.orbit == recvGranule.orbit: hashing.remove(i) found_elem = 1 if found_elem == 0: raise RuntimeError( "Failed to find proper element to remove in list.") # Append to the list of successful hashes succeedHash.append(recvGranule) else: raise ValueError( "Slave gave invalid number {} for hash status.".format( recvGranule.hash_status)) logger.info("{} hashes failed, {} hashes successful.".format( len(failedHash), len(succeedHash))) # Do a sanity check to make sure no files were lost in this process if num_orig != len(failedHash) + len(succeedHash): errMsg = "num_orig ({}) does not equal failed + succeed ({}) hashes!".format( num_orig, len(failedHash) + len(succeedHash)) logger.error(errMsg) raise ValueError(errMsg) return failedHash
def setUp(self): self.REQUEST = MPI.Request() self.STATUS = MPI.Status()
def masterTask(Dataset, world): """ Define a Send Recv Send procedure on the master """ from mpi4py import MPI from geobipy.src.base import MPI as myMPI # Set the total number of data points nPoints = Dataset.nPoints nFinished = 0 nSent = 0 # continueRunning = np.empty(1, dtype=np.int32) # rankRecv = np.zeros(3, dtype = np.float64) # Send out the first indices to the workers for iWorker in range(1, world.size): # Get a datapoint from the file. DataPoint = Dataset._readSingleDatapoint() # If DataPoint is None, then we reached the end of the file and no more points can be read in. if DataPoint is None: # Send the kill switch to the worker to shut down. # continueRunning[0] = 0 # Do not continue running continueRunning = False world.send(continueRunning, dest=iWorker) else: # continueRunning[0] = 1 # Yes, continue with the next point. continueRunning = True world.send(continueRunning, dest=iWorker) DataPoint.Isend(dest=iWorker, world=world) nSent += 1 # Start a timer t0 = MPI.Wtime() myMPI.print("Initial data points sent. Master is now waiting for requests") # Now wait to send indices out to the workers as they finish until the entire data set is finished while nFinished < nPoints: # Wait for a worker to request the next data point status = MPI.Status() dummy = world.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) requestingRank = status.Get_source() # requestingRank = np.int(rankRecv[0]) # dataPointProcessed = rankRecv[1] nFinished += 1 # Read the next data point from the file DataPoint = Dataset._readSingleDatapoint() # If DataPoint is None, then we reached the end of the file and no more points can be read in. if DataPoint is None: # Send the kill switch to the worker to shut down. # continueRunning[0] = 0 # Do not continue running continueRunning = False world.send(continueRunning, dest=requestingRank) else: # continueRunning[0] = 1 # Yes, continue with the next point. continueRunning = True world.send(continueRunning, dest=requestingRank) DataPoint.Isend(dest=requestingRank, world=world, systems=DataPoint.system) report = (nFinished % (world.size - 1)) == 0 or nFinished == nPoints if report: e = MPI.Wtime() - t0 elapsed = str(timedelta(seconds=e)) eta = str(timedelta(seconds=(nPoints / nFinished - 1) * e)) myMPI.print( "Remaining Points {}/{} || Elapsed Time: {} h:m:s || ETA {} h:m:s" .format(nPoints - nFinished, nPoints, elapsed, eta))
def map(self, function, tasks, ntask=None, callback=None): """ Like the built-in :func:`map` function, apply a function to all of the values in a list and return the list of results. Parameters function : callable The function to apply to each element in the list. tasks : A list of tasks -- each element is passed to the input function. callback : callable (optional) A callback function to call on each result. """ from mpi4py import MPI if ntask is None: ntask = len(tasks) # If not the master just wait for instructions. if not self.is_master(): self.wait() return if function is not self.function: if self.debug: print(f"Master replacing pool function with {function}.") self.function = function F = _function_wrapper(function) # Tell all the workers what function to use. requests = [] for i in range(self.size): r = self.comm.isend(F, dest=i + 1) requests.append(r) # Wait until all of the workers have responded. See: # https://gist.github.com/4176241 MPI.Request.waitall(requests) if (not self.loadbalance) or (ntask <= self.size): # Do not perform load-balancing - the default load-balancing # scheme emcee uses. # Send all the tasks off and wait for them to be received. # Again, see the bug in the above gist. requests = [] for i, task in enumerate(tasks): worker = i % self.size + 1 if self.debug: print(f"Sent task {task} to worker {worker} with tag {i}.") r = self.comm.isend(task, dest=worker, tag=i) requests.append(r) MPI.Request.waitall(requests) # Now wait for the answers. results = [] for i in range(ntask): worker = i % self.size + 1 if self.debug: print(f"Master waiting for worker {worker} with tag {i}") result = self.comm.recv(source=worker, tag=i) if callback is not None: callback(result) results.append(result) return results else: # Perform load-balancing. The order of the results are likely to # be different from the previous case. for i, task in enumerate(tasks[0:self.size]): worker = i + 1 if self.debug: print(f"Sent task {task} to worker {worker} with tag {i}.") # Send out the tasks asynchronously. self.comm.isend(task, dest=worker, tag=i) ntasks_dispatched = self.size results = [None] * ntask for itask in range(ntask): status = MPI.Status() # Receive input from workers. result = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) worker = status.source i = status.tag if callback is not None: callback(result) results[i] = result if self.debug: print(f"Master received from worker {worker} with tag {i}") # Now send the next task to this idle worker (if there are any # left). if ntasks_dispatched < ntask: task = tasks[ntasks_dispatched] i = ntasks_dispatched if self.debug: print( f"Sent task {task} to worker {worker} with tag {i}." ) # Send out the tasks asynchronously. self.comm.isend(task, dest=worker, tag=i) ntasks_dispatched += 1 return results
def start_runs(self, execute_program_rule, n_procs, *args, **kwargs): if self.use_mpi: #master if self.get_rank(self.use_mpi) == 0: master_thread = MasterSlaveThread(self, execute_program_rule, self.get_parameters(), *args, **kwargs) #seed slaves for proc in range(1, comm.size): self.dump_config_files(proc) comm.send(self.get_parameters(), dest=proc, tag=proc) master_thread.start() nodes_done = 0 status = MPI.Status() while nodes_done != comm.size - 1: success = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if success == self.node_finished: nodes_done += 1 continue elif success != self.job_success: self.stop(status.Get_source(), success) comm.send(self.get_parameters_thread(), dest=status.Get_source(), tag=status.Get_tag()) master_thread.join() #slave else: parameters = comm.recv(source=0, tag=comm.rank) while parameters is not None: success = self.run_parameters(execute_program_rule, parameters, comm.rank, *args, **kwargs) comm.send(success, dest=0, tag=comm.rank) parameters = comm.recv(source=0, tag=comm.rank) comm.send(self.node_finished, dest=0, tag=comm.rank) comm.Barrier() else: self.all_threads = [] self.stopped = False for proc in range(n_procs): self.dump_config_files(proc) thread = Worker(self, execute_program_rule, proc, *args, **kwargs) self.all_threads.append(thread) for thread in self.all_threads: thread.start() for thread in self.all_threads: thread.join()
def enum(*sequential, **named): """Handy way to fake an enumerated type in Python http://stackoverflow.com/questions/36932/how-can-i-represent-an-enum-in-python """ enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums) # Define MPI message tags tags = enum('READY', 'DONE', 'EXIT', 'START') # Initializations and preliminaries comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object if rank == 0: # Master process executes code below tasks = range(2 * size) task_index = 0 num_workers = size - 1 closed_workers = 0 print("Master starting with %d workers" % num_workers) while closed_workers < num_workers: data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() if tag == tags.READY: # Worker is ready, so send it a task if task_index < len(tasks):
def alpso(dimensions, constraints, neqcons, xtype, x0, xmin, xmax, swarmsize, nhn, nhm, maxOutIter, maxInnIter, minInnIter, stopCriteria, stopIters, etol, itol, rtol, atol, dtol, prtOutIter, prtInnIter, r0, vinit, vmax, c1, c2, w1, w2, ns, nf, vcrazy, fileout, filename, logfile, hstfile, rseed, scale, nhs, objfunc): """Python Version of the Augmented Lagrangian Particle Swarm Optimizer Documentation last updated: April. 29, 2008 - Ruben E. Perez """ # MPI Setup comm = MPI.COMM_WORLD nproc = comm.Get_size() myrank = comm.Get_rank() status = MPI.Status() if mpi4py.__version__[0] == '0': Barrier = comm.Barrier Send = comm.Send Recv = comm.Recv Bcast = comm.Bcast # elif mpi4py.__version__[0] == '1': else: # version can be 1, 2, 3 .... or more Barrier = comm.barrier Send = comm.send Recv = comm.recv Bcast = comm.bcast master = 0 slave_sent_tag = 1 master_sent_tag = 2 break_tag = 3 if myrank != master: prtOutIter = 0 prtInnIter = 0 fileout = 0 if x0 != []: if isinstance(x0, list): x0 = numpy.array(x0) elif not isinstance(x0, numpy.ndarray): if myrank == master: print("Warning: Initial x must be either list or numpy.array, " "all initial positions randomly generated") else: pass if hstfile is not None: h_start = True else: h_start = False if logfile is not None: sto_hst = True else: sto_hst = False h_start = Bcast(h_start, root=0) # Set random number seed rand = random.Random() if rseed == {}: rseed = time.time() rseed = Bcast(rseed, root=0) rand.seed(rseed) if filename == '': filename = 'ALPSO.out' ofname = '' sfname = '' fntmp = filename.split('.') if len(fntmp) == 1: ofname += fntmp[0] + '_print.out' sfname += fntmp[0] + '_summary.out' else: if '/' not in fntmp[-1] and '\\' not in fntmp[-1]: ofname += filename[:filename.rfind('.')] + '_print.' + fntmp[-1] sfname += filename[:filename.rfind('.')] + '_summary.' + fntmp[-1] else: ofname += filename + '_print.out' sfname += filename + '_summary.out' header = '' header += ' ' * 37 + '======================\n' header += ' ' * 39 + ' ALPSO 1.1 (DPM)\n' header += ' ' * 37 + '======================\n\n' header += 'Parameters:\n' header += '-' * 97 + '\n' if maxInnIter != minInnIter: diI = 1 else: diI = 0 if x0 != []: if len(x0.shape) == 1: nxi = 1 else: nxi = x0.shape[0] else: nxi = 0 header += 'Swarmsize :%9d' % swarmsize + \ ' MaxOuterIters :%9d' % maxOutIter + \ ' Seed:%26.8f\n' % rseed header += 'Cognitive Parameter :%9.3f' % c1 + \ ' MaxInnerIters :%9d' % maxInnIter + \ ' Scaling :%11d\n' % scale header += 'Social Parameter :%9.3f' % c2 + \ ' MinInnerIters :%9d' % minInnIter + \ ' Stopping Criteria :%11d\n' % stopCriteria header += 'Initial Weight :%9.3f' % w1 + \ ' DynInnerIters :%9d' % diI + \ ' Number of Failures :%11d\n' % ns header += 'Final Weight :%9.3f' % w2 + \ ' StoppingIters :%9d' % stopIters + \ ' Number of Successes:%11d\n\n' % nf header += 'Absolute Tolerance : %1.2e' % atol + \ ' Number Initial Pos:%9d' % nxi + \ ' Neighbourhood Model:%11s\n' % nhm header += 'Relative Tolerance : %1.2e' % rtol + \ ' Initial Velocity :%9d' % vinit + \ ' Neighbourhood Size :%11d\n' % nhn header += 'Inequality Tolerance: %1.2e' % itol + \ ' Maximum Velocity :%9d' % vmax + \ ' Selfless :%11d\n' % nhs header += 'Equality Tolerance : %1.2e' % etol + \ ' Craziness Velocity: %1.2e' % vcrazy + \ ' Fileout :%11d\n' % fileout header += 'Global Distance : %1.2e' % dtol + \ ' Initial Penalty :%9.2f' % r0 + \ ' File Name :%11s\n' % filename header += '-' * 97 + '\n\n' if (fileout == 1) or (fileout == 3): if os.path.isfile(ofname): os.remove(ofname) ofile = open(ofname, 'w') ofile.write(header) if (fileout == 2) or (fileout == 3): if os.path.isfile(sfname): os.remove(sfname) sfile = open(sfname, 'w') sfile.write(header) dt = 1.0 vlimit = vmax vmax = numpy.ones(dimensions, float) * vmax if scale == 1: space_centre = numpy.zeros(dimensions, float) space_halflen = numpy.zeros(dimensions, float) for j in range(dimensions): space_centre[j] = (xmin[j] + xmax[j]) / 2.0 space_halflen[j] = ((xmax[j] - xmin[j]) / 2.0) xmin = -numpy.ones(dimensions, float) xmax = numpy.ones(dimensions, float) else: for j in range(dimensions): vmax[j] = ((xmax[j] - xmin[j]) / 2.0) * vlimit # Initialize the positions and velocities for entire population x_k = numpy.zeros((swarmsize, dimensions), float) v_k = numpy.zeros((swarmsize, dimensions), float) discrete_i = [] for i in range(swarmsize): for j in range(dimensions): x_k[i, j] = xmin[j] + rand.random() * (xmax[j] - xmin[j]) if xtype[j] == 1: discrete_i.append(j) v_k[i, j] = (xmin[j] + rand.random() * (xmax[j] - xmin[j])) / dt if x0 != []: if len(x0.shape) == 1: if scale == 1: x_k[0, :] = (x0[:] - space_centre) / space_halflen else: x_k[0, :] = x0[:] else: if x0.shape[0] > swarmsize: if myrank == master: print('Warning: %d initial positions specified ' 'for %d particles, last %d positions ignored' % (x0.shape[0], swarmsize, x0.shape[0] - swarmsize)) x0 = x0[0:swarmsize, :] for i in range(x0.shape[0]): if scale == 1: x_k[i, :] = (x0[i, :] - space_centre) / space_halflen else: x_k[i, :] = x0[i, :] # Initialize Augmented Lagrange f = numpy.zeros(swarmsize, float) L = numpy.zeros(swarmsize, float) g = numpy.zeros([swarmsize, constraints], float) g_old = numpy.zeros([swarmsize, constraints], float) rp = numpy.ones(constraints, float) * r0 lambda_val = numpy.zeros(constraints, float) lambda_old = numpy.zeros(constraints, float) tau = numpy.zeros([swarmsize, constraints], float) tau_new = numpy.zeros(constraints, float) tau_old = numpy.zeros(constraints, float) nfevals = 0 if h_start: if myrank == master: [vals, hist_end] = hstfile.read([], ident=['obj', 'con']) if not hist_end: f = vals['obj'][0] g = vals['con'][0].reshape(g.shape) else: h_start = False hstfile.close() h_start = Bcast(h_start, root=0) if not h_start: ## MPI Objective Function Evaluation Barrier() if myrank == master: # Send first round of particles p_ind = 0 for proc in range(1, nproc): if scale == 1: xtmp = (x_k[p_ind, :] * space_halflen) + space_centre else: xtmp = x_k[p_ind, :] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) sendbuf = [p_ind, xtmp] Send(sendbuf, proc, master_sent_tag) p_ind += 1 p_ind -= 1 # Master loop master_stop_flag = False while not master_stop_flag: recvbuf = Recv(None, MPI.ANY_SOURCE, slave_sent_tag, status) if status.tag is slave_sent_tag: f[recvbuf[0]] = recvbuf[1] g[recvbuf[0], :] = recvbuf[2][:] if p_ind == (swarmsize + nproc - 3): master_stop_flag = True # Increment the particle index p_ind += 1 # Send next particle if p_ind < swarmsize: if scale == 1: xtmp = (x_k[p_ind, :] * space_halflen) + space_centre else: xtmp = x_k[p_ind, :] sendbuf = [p_ind, xtmp] Send(sendbuf, status.source, master_sent_tag) # Send break command for proc in range(1, nproc): Send([-1], proc, break_tag) else: # Slave Evaluation Loop while 1: recvbuf = Recv(None, master, MPI.ANY_TAG, status) if status.tag == break_tag: break elif status.tag == master_sent_tag: [ff, gg] = objfunc(recvbuf[1]) # Send Results to Master sendbuf = [recvbuf[0], ff, gg] Send(sendbuf, master, slave_sent_tag) Barrier() nfevals += swarmsize for i in range(swarmsize): # Augmented Lagrangian Value L[i] = f[i] if constraints > 0: # Equality Constraints for l in range(neqcons): tau[i, l] = g[i, l] # Inequality Constraints for l in range(neqcons, constraints): if rp[l] != 0: if g[i, l] > -lambda_val[l] / (2 * rp[l]): tau[i, l] = g[i, l] else: tau[i, l] = -lambda_val[l] / (2 * rp[l]) else: tau[i, l] = g[i, l] for l in range(constraints): L[i] += lambda_val[l] * tau[i, l] + rp[l] * tau[i, l]**2 # Initialize Particles Best best_x = numpy.zeros((swarmsize, dimensions)) best_L = numpy.zeros(swarmsize, float) best_f = numpy.zeros(swarmsize, float) best_g = numpy.zeros([swarmsize, constraints], float) for i in range(swarmsize): for j in range(dimensions): best_x[i, j] = x_k[i, j] best_L[i] = L[i] best_f[i] = f[i] for l in range(constraints): best_g[i, l] = g[i, l] # Initialize Swarm Best swarm_i = L.argmin() swarm_i_old = 0 swarm_x = numpy.zeros(dimensions, float) for j in range(dimensions): swarm_x[j] = x_k[swarm_i, j] swarm_L = L[swarm_i] swarm_L_old = L[0] swarm_f = f[swarm_i] swarm_f_old = f[0] swarm_g = numpy.zeros(constraints, float) swarm_g_old = numpy.zeros(constraints, float) for l in range(constraints): swarm_g[l] = g[swarm_i, l] swarm_g_old[l] = g[0, l] # Initialize Neighbourhood if (nhm == 'dlring') or (nhm == 'slring') or (nhm == 'wheel') or ( nhm == 'spatial') or (nhm == 'sfrac'): nhps = [] nhbest_L = numpy.ones(swarmsize) * inf nhbest_f = numpy.zeros(swarmsize) nhbest_x = numpy.zeros((swarmsize, dimensions)) nhbest_i = numpy.zeros(swarmsize) if nhm == 'dlring': for i in range(swarmsize): nhps.append([]) if nhs == 0: nhps[i].append(i) for nb in range(1, (nhn / 2) + 1): if i + nb >= swarmsize: nhps[i].append(-1 + nb) else: nhps[i].append(i + nb) if i - nb < 0: nhps[i].append(swarmsize + i - nb) else: nhps[i].append(i - nb) elif nhm == 'slring': for i in range(swarmsize): nhps.append([]) if nhs == 0: nhps[i].append(i) for nb in range(1, (nhn / 2) + 1): if i + nb >= swarmsize: nhps[i].append(-1 + nb) else: nhps[i].append(i + nb) if i - (nb * 2) < 0: nhps[i].append(swarmsize + i - (nb * 2)) else: nhps[i].append(i - (nb * 2)) elif nhm == 'wheel': nhps.append([]) nhps[0].append(0) for i in range(1, swarmsize): nhps.append([]) nhps[i].append(i) nhps[i].append(0) nhps[0].append(i) elif nhm == 'spatial': pdist = numpy.ones((swarmsize, swarmsize)) * inf for i in range(swarmsize): for i2 in range(i + 1, swarmsize): pdist[i, i2] = numpy.linalg.norm(x_k[i2, :] - x_k[i, :]) for i2 in range(i): pdist[i, i2] = pdist[i2, i] for i in range(swarmsize): nhps.append([]) for nb in range(nhn): nhps[i].append(pdist[i, :].argmin()) pdist[i, nhps[i][nb]] = inf if nhs == 0: nhps[i].append(i) elif nhm == 'sfrac': pdist = numpy.zeros((swarmsize, swarmsize)) d_max = numpy.zeros(swarmsize) frac = 0.6 for i in range(swarmsize): for i2 in range(i + 1, swarmsize): pdist[i, i2] = numpy.linalg.norm(x_k[i2, :] - x_k[i, :]) for i2 in range(i): pdist[i, i2] = pdist[i2, i] for i in range(swarmsize): nhps.append([]) d_max[i] = pdist[i, :].max() for i2 in range(swarmsize): if i == i2: if nhs == 1: pass else: nhps[i].append(i) else: if (pdist[i, i2] / d_max[i] < frac): nhps[i].append(i2) # Initialize Neighbourhood Best for i in range(swarmsize): for nbp in nhps[i]: if (L[nbp] < nhbest_L[i]): nhbest_L[i] = L[nbp] nhbest_f[i] = f[nbp] nhbest_x[i, :] = x_k[nbp, :] nhbest_i[i] = nbp # Initialize stopping criteria distances global_dist = 0 for i in range(swarmsize): dist = 0 for j in range(dimensions): dist += (x_k[i, j] - swarm_x[j])**2 global_dist += dist**0.5 # relative extent of the swarm global_distance_reference = global_dist / swarmsize global_distance = numpy.zeros(stopIters, float) global_L = numpy.zeros(stopIters, float) for k in range(stopIters): global_distance[k] = global_distance_reference global_L[k] = swarm_L # Store History if sto_hst: logfile.write(rseed, 'seed') if scale == 1: x_uns = numpy.zeros(x_k.shape) for i in range(swarmsize): x_uns[i, :] = (x_k[i, :] * space_halflen) + space_centre else: x_uns = x_k if discrete_i != []: for i in range(swarmsize): for m in discrete_i: x_uns[i, m] = floor(x_uns[i, m] + 0.5) logfile.write(x_uns, 'x') logfile.write(f, 'obj') logfile.write(g, 'con') logfile.write(swarm_x, 'gbest_x') logfile.write(swarm_f, 'gbest_f') logfile.write(swarm_g, 'gbest_g') # Output to Summary File if (fileout == 2) or (fileout == 3): stext = '' stext += 'Global Best Particle:\n' stext += '-' * 94 + '\n' stext += ' Major Minor nFCon Violation(L2) Objective Lagrangian Rel Lagrangian Global Dist\n' stext += '-' * 94 + '\n' sfile.write(stext) sfile.flush() # Outer optimization loop k_out = 0 stop_main_flag = 0 no_successes = 0 no_failures = 0 rho = 1.0 vcr = 0.0 while (k_out < maxOutIter) and (stop_main_flag == 0): k_out += 1 # Update g_old Major Iteration for i in range(swarmsize): g_old[i, :] = g[i, :] # Inner optimization loop - # core ALPSO algorithm applied to the lagrangian function k_inn = 0 stop_inner = 0 while (k_inn < maxInnIter) and (stop_inner == 0): k_inn += 1 # calculating new search radius for the best particle # ("Guaranteed Convergence" method) if (swarm_i == swarm_i_old) and (swarm_L >= swarm_L_old): no_failures += 1 no_successes = 0 elif (swarm_i == swarm_i_old) and (swarm_L < swarm_L_old): no_successes += 1 no_failures = 0 else: no_successes = 0 no_failures = 0 if no_successes > ns: rho *= 2.0 no_successes = 0 elif no_failures > nf: rho *= 0.5 no_failures = 0 if rho < 10e-5: rho = 10e-5 elif rho > 1.0: rho = 1.0 # memorization for next outer iteration if k_inn == 1: swarm_i_old = swarm_i swarm_L_old = swarm_L swarm_f_old = swarm_f swarm_g_old[:] = swarm_g[:] # stopping criteria distances global_dist = 0 for i in range(swarmsize): dist = 0 for j in range(dimensions): dist += (x_k[i, j] - swarm_x[j])**2 global_dist += (dist)**0.5 global_distance[ 0] = global_dist / swarmsize # relative extent of the swarm # Update inertia weight w = w2 + ( (w2 - w1) / global_distance_reference) * global_distance[1] if w > w1: w = w1 elif w < w2: w = w2 # Swarm Update for i in range(swarmsize): # Update velocity vector if (nhm == 'dlring') or (nhm == 'slring') or \ (nhm == 'wheel') or (nhm == 'spatial') or \ (nhm == 'sfrac'): lbest_x = nhbest_x[i, :] else: lbest_x = swarm_x[:] for j in range(dimensions): if i == swarm_i: rr = rand.random() v_k[i, j] = w * v_k[i, j] + -x_k[ i, j] + swarm_x[j] + rho * (1.0 - 2.0 * rr) else: r1 = rand.random() r2 = rand.random() rc = rand.random() v_k[i, j] = w * v_k[i, j] + c1 * r1 * ( best_x[i, j] - x_k[i, j]) / dt + c2 * r2 * ( lbest_x[j] - x_k[i, j]) / dt + vcr * (1.0 - 2.0 * rc) # Check for velocity vector out of range if v_k[i, j] > vmax[j]: v_k[i, j] = vmax[j] elif v_k[i, j] < -vmax[j]: v_k[i, j] = -vmax[j] # positions update x_k[i, j] += v_k[i, j] * dt if xtype[j] == 1: x_k[i, j] = floor(x_k[i, j] + 0.5) # Check for positions out of range if x_k[i, j] > xmax[j]: x_k[i, j] = xmax[j] elif x_k[i, j] < xmin[j]: x_k[i, j] = xmin[j] if h_start: if myrank == master: [vals, hist_end] = hstfile.read([], ident=['obj', 'con']) if not hist_end: f = vals['obj'][0] g = vals['con'][0].reshape(g.shape) else: h_start = False hstfile.close() h_start = Bcast(h_start, root=0) if not h_start: # MPI Objective Function Evaluation Barrier() if myrank == master: # Send first round of particles p_ind = 0 for proc in range(1, nproc): if scale == 1: xtmp = (x_k[p_ind, :] * space_halflen) + space_centre else: xtmp = x_k[p_ind, :] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) sendbuf = [p_ind, xtmp] Send(sendbuf, proc, master_sent_tag) p_ind += 1 p_ind -= 1 # Master loop master_stop_flag = False while not master_stop_flag: recvbuf = Recv(None, MPI.ANY_SOURCE, slave_sent_tag, status) if status.tag is slave_sent_tag: f[recvbuf[0]] = recvbuf[1] g[recvbuf[0], :] = recvbuf[2][:] if p_ind == (swarmsize + nproc - 3): master_stop_flag = True # Increment the particle index p_ind += 1 # Send next particle if p_ind < swarmsize: if scale == 1: xtmp = (x_k[p_ind, :] * space_halflen) + space_centre else: xtmp = x_k[p_ind, :] sendbuf = [p_ind, xtmp] Send(sendbuf, status.source, master_sent_tag) # Send break command for proc in range(1, nproc): Send([-1], proc, break_tag) else: # Slave Evaluation Loop while 1: recvbuf = Recv(None, master, MPI.ANY_TAG, status) if status.tag == break_tag: break elif status.tag == master_sent_tag: [ff, gg] = objfunc(recvbuf[1]) # Send Results to Master sendbuf = [recvbuf[0], ff, gg] Send(sendbuf, master, slave_sent_tag) nfevals += swarmsize # Store History if sto_hst: if scale == 1: x_uns = numpy.zeros(x_k.shape) for i in range(swarmsize): x_uns[i, :] = (x_k[i, :] * space_halflen) + space_centre else: x_uns = x_k if discrete_i != []: for i in range(swarmsize): for m in discrete_i: x_uns[i, m] = floor(x_uns[i, m] + 0.5) logfile.write(x_uns, 'x') logfile.write(f, 'obj') logfile.write(g, 'con') # Augmented Lagrange for i in range(swarmsize): # Lagrangian Value L[i] = f[i] if constraints > 0: # Equality Constraints for l in range(neqcons): tau[i, l] = g[i, l] # Inequality Constraints for l in range(neqcons, constraints): if rp[l] != 0: if g[i, l] > -lambda_val[l] / (2 * rp[l]): tau[i, l] = g[i, l] else: tau[i, l] = -lambda_val[l] / (2 * rp[l]) else: tau[i, l] = g[i, l] for l in range(constraints): L[i] += lambda_val[l] * tau[i, l] + \ rp[l] * tau[i, l] ** 2 # Particle Best Update for i in range(swarmsize): if L[i] < best_L[i]: best_L[i] = L[i] best_f[i] = f[i] best_g[i, :] = g[i, :] best_x[i, :] = x_k[i, :] # Swarm Best Update for i in range(swarmsize): if L[i] < swarm_L: # update of the best particle and best position swarm_i = i swarm_x[:] = x_k[i, :] # update of the best objective function value found swarm_f = f[i] # update of the best constraints values found swarm_g[:] = g[i, :] # update of the swarm best L swarm_L = L[i] # Spatial Neighbourhood Update if (nhm == 'spatial') or (nhm == 'sfrac'): for i in range(swarmsize): for i2 in range(i + 1, swarmsize): pdist[i, i2] = numpy.linalg.norm(x_k[i2, :] - x_k[i, :]) for i2 in range(i): pdist[i, i2] = pdist[i2, i] if nhm == 'spatial': for i in range(swarmsize): nhps[i] = [] for nb in range(nhn): nhps[i].append(pdist[i, :].argmin()) pdist[i, nhps[i][nb]] = inf if nhs == 0: nhps[i].append(i) else: frac = ((3 * k_out) + 0.6 * maxOutIter) / maxOutIter if frac >= 1.0: nhm = 'gbest' else: for i in range(swarmsize): nhps[i] = [] d_max[i] = pdist[i, :].max() for i2 in range(swarmsize): if i == i2: if nhs == 1: pass else: nhps[i].append(i) else: if pdist[i, i2] / d_max[i] < frac: nhps[i].append(i2) # Neighbourhood Best Update if (nhm == 'dlring') or (nhm == 'slring') or (nhm == 'wheel') or \ (nhm == 'spatial') or (nhm == 'sfrac'): for i in range(swarmsize): for nbp in nhps[i]: if L[nbp] < nhbest_L[i]: nhbest_L[i] = L[nbp] nhbest_f[i] = f[nbp] nhbest_x[i, :] = x_k[nbp, :] nhbest_i[i] = nbp # Print Inner if prtInnIter != 0 and numpy.mod(k_inn, prtInnIter) == 0: # output to screen print('%d Inner Iteration of %d Outer Iteration' % (k_inn, k_out)) if (fileout == 1) or (fileout == 3): # output to filename pass # Inner Loop Convergence if k_inn >= minInnIter: if myrank == master: if swarm_L < swarm_L_old: stop_inner = 1 stop_inner = Bcast(stop_inner, root=0) # Store History if sto_hst: logfile.write(swarm_x, 'gbest_x') logfile.write(swarm_f, 'gbest_f') logfile.write(swarm_g, 'gbest_g') # Print Outer if myrank == master: if prtOutIter != 0 and numpy.mod(k_out, prtOutIter) == 0: # Output to screen print("=" * 80 + "\n") print("NUMBER OF ITERATIONS: %d\n" % k_out) print("NUMBER OF OBJECTIVE FUNCTION EVALUATIONS: %d\n" % nfevals) print("OBJECTIVE FUNCTION VALUE:") print("\tF = %.16g\n" % (float(swarm_f))) if constraints > 0: # Equality Constraints print("EQUALITY CONSTRAINTS VALUES:") for l in range(neqcons): print("\tH(%d) = %g" % (l, swarm_g[l])) # Inequality Constraints print("\nINEQUALITY CONSTRAINTS VALUES:") for l in range(neqcons, constraints): print("\tG(%d) = %g" % (l, swarm_g[l])) print("\nLAGRANGIAN MULTIPLIERS VALUES:") for l in range(constraints): print("\tL(%d) = %g" % (l, lambda_val[l])) print("\nBEST POSITION:") if scale == 1: xtmp = (swarm_x[:] * space_halflen) + space_centre else: xtmp = swarm_x[:] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) text = '' for j in range(dimensions): text += ("\tP(%d) = %.16g\t" % (j, xtmp[j])) if numpy.mod(j + 1, 3) == 0: text += "\n" print(text) print("=" * 80 + "\n") if (fileout == 1) or (fileout == 3): # Output to filename ofile.write("\n" + "=" * 80 + "\n") ofile.write("\nNUMBER OF ITERATIONS: %d\n" % k_out) ofile.write( "\nNUMBER OF OBJECTIVE FUNCTION EVALUATIONS: %d\n" % nfevals) ofile.write("\nOBJECTIVE FUNCTION VALUE:\n") ofile.write("\tF = %.16g\n" % (float(swarm_f))) if constraints > 0: # Equality Constraints ofile.write("\nEQUALITY CONSTRAINTS VALUES:\n") for l in range(neqcons): ofile.write("\tH(%d) = %g\n" % (l, swarm_g[l])) # Inequality Constraints ofile.write("\nINEQUALITY CONSTRAINTS VALUES:\n") for l in range(neqcons, constraints): ofile.write("\tG(%d) = %g\n" % (l, swarm_g[l])) ofile.write("\nLAGRANGIAN MULTIPLIERS VALUES:\n") for l in range(constraints): ofile.write("\tL(%d) = %g\n" % (l, lambda_val[l])) ofile.write("\nBEST POSITION:\n") if scale == 1: xtmp = (swarm_x[:] * space_halflen) + space_centre else: xtmp = swarm_x[:] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) text = '' for j in range(dimensions): text += ("\tP(%d) = %.16g\t" % (j, xtmp[j])) if numpy.mod(j + 1, 3) == 0: text += "\n" ofile.write(text) ofile.write("\n" + "=" * 80 + "\n") ofile.flush() # Store History if sto_hst and (minInnIter != maxInnIter): logfile.write(k_inn, 'ninner') if myrank == master: stop_con_num = 0 infeas_con = [] if constraints == 0: stop_constraints_flag = 1 else: for l in range(neqcons): if abs(swarm_g[l]) <= etol: stop_con_num += 1 else: infeas_con.append(l) for l in range(neqcons, constraints): if swarm_g[l] < itol: stop_con_num += 1 else: infeas_con.append(l) if stop_con_num == constraints: stop_constraints_flag = 1 else: stop_constraints_flag = 0 # Test Position and Function convergence stop_criteria_flag = 0 if stopCriteria == 1: # setting up the stopping criteria based on # distance and tolerance for k in range(stopIters - 1, 0, -1): global_distance[k] = global_distance[k - 1] global_L[k] = global_L[k - 1] global_dist = 0 for i in range(swarmsize): dist = 0 for j in range(dimensions): dist += (x_k[i, j] - swarm_x[j])**2 global_dist += (dist)**0.5 global_distance[ 0] = global_dist / swarmsize # relative extent of the swarm global_L[0] = swarm_L if (abs(global_distance[0] - global_distance[stopIters - 1]) <= dtol * abs(global_distance[stopIters - 1]) and abs(global_L[0] - global_L[stopIters - 1]) <= rtol * abs(global_L[stopIters - 1]) or abs(global_L[0] - global_L[stopIters - 1]) <= atol): stop_criteria_flag = 1 else: stop_criteria_flag = 0 # Test Convergence if stop_constraints_flag == 1 and stop_criteria_flag == 1: stop_main_flag = 1 else: stop_main_flag = 0 # Output to Summary File if (fileout == 2) or (fileout == 3): cvss = 0.0 for l in infeas_con: cvss += swarm_g[l]**2 cvL2 = cvss**0.5 if stopCriteria == 1: relL = (abs(global_L[0] - global_L[stopIters - 1]) / abs(global_L[stopIters - 1])) stext = ('%9d%8d%8d%15.4e%15f%13.4e%16.4e%14.4e\n' % (k_out, k_inn, stop_con_num, cvL2, swarm_f, swarm_L, relL, global_distance[0])) else: stext = ('%9d%8d%8d%15.4e%15f%13.4e%16s%14s\n' % (k_out, k_inn, stop_con_num, cvL2, swarm_f, swarm_L, 'NA', 'NA')) sfile.write(stext) sfile.flush() # Update Augmented Lagrangian Terms if stop_main_flag == 0: if constraints > 0: # Update new Tau for l in range(neqcons): tau_new[l] = swarm_g[l] for l in range(neqcons, constraints): if swarm_g[l] > -lambda_val[l] / (2 * rp[l]): tau_new[l] = swarm_g[l] else: tau_new[l] = -lambda_val[l] / (2 * rp[l]) # Update Lagrange Multiplier for l in range(constraints): lambda_old[l] = lambda_val[l] lambda_val[l] += 2 * rp[l] * tau_new[l] if abs(lambda_val[l]) < eps: lambda_val[l] = 0.0 # Update Penalty Factor for l in range(neqcons): if (abs(swarm_g[l]) > abs(swarm_g_old[l]) and abs(swarm_g[l]) > etol): rp[l] *= 2.0 elif abs(swarm_g[l]) <= etol: rp[l] *= 0.5 for l in range(neqcons, constraints): if swarm_g[l] > swarm_g_old[l] and swarm_g[l] > itol: rp[l] *= 2.0 elif swarm_g[l] <= itol: rp[l] *= 0.5 # Apply Lower Bounds on rp for l in range(neqcons): if rp[l] < 0.5 * (abs(lambda_val[l]) / etol)**0.5: rp[l] = 0.5 * (abs(lambda_val[l]) / etol)**0.5 for l in range(neqcons, constraints): if rp[l] < 0.5 * (abs(lambda_val[l]) / itol)**0.5: rp[l] = 0.5 * (abs(lambda_val[l]) / itol)**0.5 for l in range(constraints): if rp[l] < 1: rp[l] = 1 for i in range(swarmsize): if constraints > 0: # Update Tau for l in range(neqcons): tau[i, l] = g[i, l] for l in range(neqcons, constraints): if g[i, l] > -lambda_val[l] / (2 * rp[l]): tau[i, l] = g[i, l] else: tau[i, l] = -lambda_val[l] / (2 * rp[l]) # set craziness velocity for next inner loop run vcr = (1 - k_out / maxOutIter) * vcrazy # update swarm with new Lagrangian function for next inner run for i in range(swarmsize): L[i] = f[i] if constraints > 0: for l in range(constraints): L[i] += lambda_val[l] * tau[i, l] + \ rp[l] * tau[i, l] ** 2 swarm_L = L[swarm_i] swarm_L_old = swarm_f_old if constraints > 0: # Equality Constraints for l in range(neqcons): tau_old[l] = swarm_g_old[l] # Inequality Constraints for l in range(neqcons, constraints): if rp[l] != 0: if swarm_g_old[l] > -lambda_val[l] / (2 * rp[l]): tau_old[l] = swarm_g_old[l] else: tau_old[l] = -lambda_val[l] / (2 * rp[l]) else: tau_old[l] = swarm_g_old[l] # for l in range(constraints): swarm_L_old += lambda_val[l] * tau_old[l] + rp[l] * \ tau_old[ l] ** 2 # reset swarm memory for next inner run for i in range(swarmsize): best_L[i] = L[i] best_f[i] = f[i] best_g[i, :] = g[i, :] best_x[i, :] = x_k[i, :] Barrier() recv_buf = Bcast(stop_main_flag, root=0) stop_main_flag = recv_buf # Print Results if myrank == master: if prtOutIter != 0: # Output to screen print("=" * 80 + "\n") print("RANDOM SEED VALUE: %.8f\n" % rseed) print("NUMBER OF ITERATIONS: %d\n" % k_out) print("NUMBER OF OBJECTIVE FUNCTION EVALUATIONS: %d\n" % nfevals) print("OBJECTIVE FUNCTION VALUE:") print("\tF = %.16g\n" % (float(swarm_f))) if constraints > 0: # Equality Constraints print("EQUALITY CONSTRAINTS VALUES:") for l in range(neqcons): print("\tH(%d) = %g" % (l, swarm_g[l])) # Inequality Constraints print("\nINEQUALITY CONSTRAINTS VALUES:") for l in range(neqcons, constraints): print("\tG(%d) = %g" % (l, swarm_g[l])) print("\nLAGRANGIAN MULTIPLIERS VALUES:") for l in range(constraints): print("\tL(%d) = %g" % (l, float(lambda_val[l]))) print("\nBEST POSITION:") if scale == 1: xtmp = (swarm_x[:] * space_halflen) + space_centre else: xtmp = swarm_x[:] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) text = '' for j in range(dimensions): text += ("\tP(%d) = %.16g\t" % (j, xtmp[j])) if numpy.mod(j + 1, 3) == 0: text += "\n" print(text) print("=" * 80 + "\n") if (fileout == 1) or (fileout == 3): ofile.close() if (fileout == 2) or (fileout == 3): # Output to Summary sfile.write("\n\nSolution:") sfile.write("\n" + "=" * 97 + "\n") sfile.write("\nNUMBER OF ITERATIONS: %d\n" % k_out) sfile.write("\nNUMBER OF OBJECTIVE FUNCTION EVALUATIONS: %d\n" % nfevals) sfile.write("\nOBJECTIVE FUNCTION VALUE:\n") sfile.write("\tF = %.16g\n" % (float(swarm_f))) if constraints > 0: # Equality Constraints sfile.write("\nEQUALITY CONSTRAINTS VALUES:\n") for l in range(neqcons): sfile.write("\tH(%d) = %g\n" % (l, swarm_g[l])) # Inequality Constraints sfile.write("\nINEQUALITY CONSTRAINTS VALUES:\n") for l in range(neqcons, constraints): sfile.write("\tG(%d) = %g\n" % (l, swarm_g[l])) sfile.write("\nLAGRANGIAN MULTIPLIERS VALUES:\n") for l in range(constraints): sfile.write("\tL(%d) = %g\n" % (l, float(lambda_val[l]))) sfile.write("\nBEST POSITION:\n") if scale == 1: xtmp = (swarm_x[:] * space_halflen) + space_centre else: xtmp = swarm_x[:] for m in discrete_i: xtmp[m] = floor(xtmp[m] + 0.5) text = '' for j in range(dimensions): text += ("\tP(%d) = %.16g\t" % (j, xtmp[j])) if numpy.mod(j + 1, 3) == 0: text += "\n" sfile.write(text) sfile.write("\n" + "=" * 97 + "\n") sfile.flush() sfile.close() # Results if scale == 1: opt_x = (swarm_x * space_halflen) + space_centre else: opt_x = swarm_x for m in discrete_i: opt_x[m] = floor(opt_x[m] + 0.5) opt_f = swarm_f opt_g = swarm_g opt_lambda = lambda_val[:] opt_x = Bcast(opt_x, root=0) opt_f = Bcast(opt_f, root=0) opt_g = Bcast(opt_g, root=0) opt_lambda = Bcast(opt_lambda, root=0) return opt_x, opt_f, opt_g, opt_lambda, nfevals, '%.8f' % rseed
def main(): """ main program """ # mpi attributes comm = MPI.COMM_WORLD stat = MPI.Status() rank = comm.Get_rank() size = comm.Get_size() assert 1 < size, 'script must be run in parallel: `mpiexec -np N ...`' # init decomp object decomp = decodense.DecompCls(**PARAMS) # master if rank == 0: # write MPI parameters print('\n MPI global size = {:}\n'.format(size)) # make output dir if not os.path.isdir(OUTPUT): restart = False os.mkdir(OUTPUT) else: restart = True # load in dataset data = sio.loadmat(INPUT) # number of slaves and tasks n_slaves = size - 1 n_tasks = data['R'].shape[0] # start_idx if restart: res_el = np.load(OUTPUT + 'elec.npy') res_nuc = np.load(OUTPUT + 'nuc.npy') start_idx = np.argmax(res_el[:, 0] == 0.) else: res_el = np.zeros([n_tasks, N_ATOMS], dtype=np.float64) res_nuc = np.zeros([n_tasks, N_ATOMS], dtype=np.float64) start_idx = 0 # loop over molecules in data set for mol_idx, mol_geo in enumerate(data['R'][start_idx:], start_idx): # probe for available slaves comm.Probe(source=MPI.ANY_SOURCE, tag=1, status=stat) # receive slave results res = comm.recv(source=stat.source, tag=1) # retrieve results if res is not None: res_el[res['idx']] = res['prop_el'] res_nuc[res['idx']] = res['prop_nuc'] if res['idx'] % RST_FREQ == 0: # save results np.save(OUTPUT + 'elec', res_el) np.save(OUTPUT + 'nuc', res_nuc) # print status prog = (res['idx'] + 1) / n_tasks status = int(round(50 * prog)) remainder = (50 - status) print(' STATUS: [{:}] --- {:>6.2f} %'.format( '#' * status + '-' * remainder, prog * 100.)) # send mol_dict to slave comm.send({'idx': mol_idx, \ 'struct': [[int(z), mol_geo[i]] for i, z in enumerate(data['Z'][mol_idx]) if 0. < z]}, \ dest=stat.source, tag=2) # done with all tasks while n_slaves > 0: # probe for available slaves comm.Probe(source=MPI.ANY_SOURCE, tag=1, status=stat) # receive slave results res = comm.recv(source=stat.source, tag=1) # save results if res is not None: res_el[res['idx']] = res['prop_el'] res_nuc[res['idx']] = res['prop_nuc'] if res['idx'] % RST_FREQ == 0: np.save(OUTPUT + 'elec', res_el) np.save(OUTPUT + 'nuc', res_nuc) # send exit signal to slave comm.send(None, dest=stat.source, tag=2) # remove slave n_slaves -= 1 # save final results np.save(OUTPUT + 'elec', res_el) np.save(OUTPUT + 'nuc', res_nuc) # print final status print(' STATUS: [{:}] --- {:>6.2f} %'.format( '#' * 50 + '-' * 0, 100.)) # write final info with open(OUTPUT + 'info.txt', 'w') as f_info: f_info.write(decodense.info(decomp)) else: # slaves # send availability to master comm.send(None, dest=0, tag=1) # receive work from master while True: # receive mol_dict mol_dict = comm.recv(source=0, tag=2) # perform task if mol_dict is not None: # init molecule mol = gto.M(verbose = 0, output = None, unit = UNIT, \ basis = PARAMS['basis'], atom = mol_dict['struct']) # decodense calc res = decodense.main(mol, decomp) # send results to master comm.send( { 'idx': mol_dict['idx'], 'prop_nuc': res['prop_nuc'], 'prop_el': res['prop_el'] }, dest=0, tag=1) else: # exit break # barrier comm.Barrier()