def test_10_write_starvation(self): # make sure write query do not starve # when issuing a large number of read queries # alongside a single write query # we dont want the write query to have to wait for # too long, consider the following sequence: # R, W, R, R, R, R, R, R, R... # if write is starved our write query might have to wait # for all queued read queries to complete while holding # Redis global lock, this will hurt performance # # this test issues a similar sequence of queries and # validates that the write query wasn't delayed too much self.graph = Graph(self.conn, GRAPH_ID) pool = Pool(nodes=CLIENT_COUNT) Rq = "UNWIND range(0, 10000) AS x WITH x WHERE x = 9999 RETURN 'R', timestamp()" Wq = "UNWIND range(0, 1000) AS x WITH x WHERE x = 27 CREATE ({v:1}) RETURN 'W', timestamp()" Slowq = "UNWIND range(0, 100000) AS x WITH x WHERE (x % 73) = 0 RETURN count(1)" # issue a number of slow queries, this will give us time to fill up # RedisGraph internal threadpool queue queries = [Slowq] * CLIENT_COUNT * 5 nulls = [None] * CLIENT_COUNT * 5 # issue queries asynchronously pool.imap(thread_run_query, queries, nulls) # create a long sequence of read queries queries = [Rq] * CLIENT_COUNT * 10 nulls = [None] * CLIENT_COUNT * 10 # inject a single write query close to the begining on the sequence queries[CLIENT_COUNT] = Wq # invoke queries # execute queries in parallel results = pool.map(thread_run_query, queries, nulls) # count how many queries completed before the write query count = 0 write_ts = results[CLIENT_COUNT]["result_set"][0][1] for result in results: row = result["result_set"][0] ts = row[1] if ts < write_ts: count += 1 # make sure write query wasn't starved self.env.assertLessEqual(count, len(queries) * 0.3) # delete the key self.conn.delete(GRAPH_ID)
def avaliacao(populacao): x = valores(populacao) n = len(populacao) es = list(set(ql.get_states[0])) def steps(k): Q = zeros(R.shape) Q[ql.get_states] = x[k, :] return sum([ql.move(e, Q=Q) for e in es]) pool = Pool(nodes=12) peso = -array(list(pool.imap(steps, range(n)))).astype(int) return peso
class MultiprocessingDistributor(DistributorBaseClass): """ Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine. """ def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction", show_warnings=True): """ Creates a new MultiprocessingDistributor instance :param n_workers: How many workers should the multiprocessing pool have? :type n_workers: int :param disable_progressbar: whether to show a progressbar or not. :type disable_progressbar: bool :param progressbar_title: the title of the progressbar :type progressbar_title: basestring :param show_warnings: whether to show warnings or not. :type show_warnings: bool """ self.pool = Pool(nodes=n_workers) self.n_workers = n_workers self.disable_progressbar = disable_progressbar self.progressbar_title = progressbar_title def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to a thread pool :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ return self.pool.imap(partial(func, **kwargs), partitioned_chunks) def close(self): """ Collects the result from the workers and closes the thread pool. """ self.pool.close() self.pool.terminate() self.pool.join()
def test_mp(): # instantiate and configure the worker pool from pathos.pools import ProcessPool pool = ProcessPool(nodes=4) _result = list(map(pow, [1, 2, 3, 4], [5, 6, 7, 8])) # do a blocking map on the chosen function result = pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8]) assert result == _result # do a non-blocking map, then extract the result from the iterator result_iter = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) result = list(result_iter) assert result == _result # do an asynchronous map, then get the results result_queue = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) result = result_queue.get() assert result == _result
def test_mp(): # instantiate and configure the worker pool from pathos.pools import ProcessPool pool = ProcessPool(nodes=4) _result = list(map(pow, [1,2,3,4], [5,6,7,8])) # do a blocking map on the chosen function result = pool.map(pow, [1,2,3,4], [5,6,7,8]) assert result == _result # do a non-blocking map, then extract the result from the iterator result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8]) result = list(result_iter) assert result == _result # do an asynchronous map, then get the results result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8]) result = result_queue.get() assert result == _result
def run_multiple(func, *args, argsList=None, kwargsList=None, messageFn=None, serial=False): """ Makes use of the pathos.multiprocessing module to run a function simultanously multiple times. This is meant mainly to update multiple plots at the same time, which can accelerate significantly the process of visualizing data. All arguments passed to the function, except func, can be passed as specified in the arguments section of this documentation or as a list containing multiple instances of them. If a list is passed, each time the function needs to be run it will take the next item of the list. If a single item is passed instead, this item will be repeated for each function run. However, at least one argument must be a list, so that the number of times that the function has to be ran is defined. Arguments ---------- func: function The function to be executed. It has to be prepared to recieve the arguments as they are provided to it (zipped). See the applyMethod() function as an example. *args: Contains all the arguments that are specific to the individual function that we want to run. See each function separately to understand what you need to pass (you may not need this parameter). argsList: array-like An array of arguments that have to be passed to the executed function. Can also be a list of arrays (see this function's description). WARNING: Currently it only works properly for a list of arrays. Didn't fix this because the lack of interest of argsList on Plot's methods (everything is passed as keyword arguments). kwargsList: dict A dictionary with the keyword arguments that have to be passed to the executed function. If the executed function is a Plot's method, these can be the settings, for example. Can also be a list of dicts (see this function's description). messageFn: function Function that recieves the number of tasks and nodes and needs to return a string to display as a description of the progress bar. serial: bool If set to true, multiprocessing is not used. This seems to have little sense, but it is useful to switch easily between multiprocessing and serial with the same code. Returns ---------- results: list A list with all the returned values or objects from each function execution. This list is ordered, so results[0] is the result of executing the function with argsList[0] and kwargsList[0]. """ #Prepare the arguments to be passed to the initSinglePlot function toZip = [*args, argsList, kwargsList] for i, arg in enumerate(toZip): if not isinstance(arg, (list, tuple, np.ndarray)): toZip[i] = itertools.repeat(arg) else: nTasks = len(arg) # Run things in serial mode in case it is demanded serial = serial or _MAX_NPROCS == 1 or nTasks == 1 if serial: return [func(argsTuple) for argsTuple in zip(*toZip)] #Create a pool with the appropiate number of processes pool = Pool(min(nTasks, _MAX_NPROCS)) #Define the plots array to store all the plots that we initialize results = [None] * nTasks #Initialize the pool iterator and the progress bar that controls it progress = tqdm.tqdm(pool.imap(func, zip(*toZip)), total=nTasks) #Set a description for the progress bar if not callable(messageFn): message = "Updating {} plots in {} processes".format( nTasks, pool.nodes) else: message = messageFn(nTasks, pool.nodes) progress.set_description(message) #Run the processes and store each result in the plots array for i, res in enumerate(progress): results[i] = res pool.close() pool.join() pool.clear() return results
class ReMap(ReIterBase): def __init__(self, fn, iterable_input, proc_type=None, n_proc=1, per_proc_buffer=1, ordered=True, name='reMap', verbose=True): """ This is a map function that can be iterated over more than once. Returns an iterator. Parameters ---------- fn iterable_input iterable input proc_type if 'sub' then uses a pathos ProcessPool to map function if 'thread' then uses standard multiprocessing ThreadPool else uses regular map n_proc number of workers in a pool (ignored if no pool) per_proc_buffer since pool's map function does not know limits, there is a forced stop-and-yield-all after this many processed tasks per process/thread ordered use ordered map by default, uses `imap_unordered` otherwise name name to use for logging messages verbose """ name += '' if proc_type not in ('sub', 'proc', 'subprocess', 'th', 'thread') else ' ' + proc_type super().__init__(iterable_input=iterable_input, name=name, verbose=verbose) self.fn = fn self.proc_type = proc_type self.per_proc_buffer = per_proc_buffer self.n_proc = n_proc self.ordered = ordered def _iter(self): if self.proc_type in ('thread', 'th') and self.n_proc > 0: with ThreadPool(self.n_proc) as p: # this is a workaround for limiting input iterator consumption, got it from SO buff = [] for itm in self.iterable_input: buff.append(itm) if len(buff) >= self.per_proc_buffer * self.n_proc: if self.ordered: for itm in p.imap(self.fn, buff): yield itm else: for itm in p.imap_unordered(self.fn, buff): yield itm buff = [] # feed the remaining buffer after input is exhausted if self.ordered: for itm in p.imap(self.fn, buff): yield itm else: for itm in p.imap_unordered(self.fn, buff): yield itm elif self.proc_type in ('sub', 'proc', 'subprocess') and self.n_proc > 0: try: log.info("Trying to terminate previous pool") # this is stupid, but that's how pathos is built self.pool.terminate() self.pool.clear() log.info("Yay! Cleared previous process pool") except AttributeError: log.warning("Is this the first time creating a pool...") self.pool = ProcessPool(nodes=self.n_proc) # this is a workaround for limiting input iterator consumption, got it from SO buff = [] for itm in self.iterable_input: buff.append(itm) if len(buff) >= self.per_proc_buffer * self.n_proc: if self.ordered: for itm in self.pool.imap(self.fn, buff): yield itm else: for itm in self.pool.uimap(self.fn, buff): yield itm buff = [] # feed the remaining buffer after input is exhausted if self.ordered: for itm in self.pool.imap(self.fn, buff): yield itm else: for itm in self.pool.uimap(self.fn, buff): yield itm else: for itm in map(self.fn, self.iterable_input): yield itm
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE from pathos.helpers import freeze_support freeze_support() # instantiate and configure the worker pool from pathos.pools import ProcessPool pool = ProcessPool(nodes=4) _result = map(pow, [1, 2, 3, 4], [5, 6, 7, 8]) # do a blocking map on the chosen function result = pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8]) assert result == _result # do a non-blocking map, then extract the result from the iterator result_iter = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) result = list(result_iter) assert result == _result # do an asynchronous map, then get the results result_queue = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8]) result = result_queue.get() assert result == _result
import pickle import sys import dill from pathos.pools import ProcessPool id_number, num_cores = sys.argv[1:3] with open('data_{}.pkl'.format(id_number), 'rb') as fp: func, args_set = dill.load(fp) pool = ProcessPool(nodes=int(num_cores)) results = list(pool.imap(func, *zip(*args_set))) # results = [ # func(*args) for args in args_set # ] with open('results_{}.pkl'.format(id_number), 'wb') as fp: pickle.dump(results, fp)
def parmap(f, X, nprocs=multiprocessing.cpu_count(), chunk_size=1, use_tqdm=False, **tqdm_kwargs): if len(X) == 0: return [] # like map # nprocs = min(nprocs, cn.max_procs) if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = int(max(1, min(nprocs, len(X) / chunk_size))) # at least 1 if len(X) < nprocs: if nprocs != multiprocessing.cpu_count(): print("parmap too much procs") nprocs = len(X) # too much procs if force_serial or nprocs == 1: # we want it serial (maybe for profiling) return list(map(f, tqdm(X, smoothing=0, **tqdm_kwargs))) def _spawn_fun(input, func, c): import random, numpy random.seed(1554 + i + c) numpy.random.seed(42 + i + c) # set random seeds try: res = func(input) res_dict = dict() res_dict["res"] = res # res_dict["functions_dict"] = function_cache2.caches_dicts # res_dict["experiment_purpose"] = cn2.experiment_purpose # res_dict["curr_params_list"] = cn2.curr_experiment_params_list return res_dict except: import traceback traceback.print_exc() raise # re-raise exception # if chunk_size == 1: # chunk_size = math.ceil(float(len(X)) / nprocs) # all procs work on an equal chunk try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) # can throw if current proc is daemon if use_tqdm: retval_par = tqdm(p.imap(_spawn_fun, X, [f] * len(X), range(len(X)), chunk_size=chunk_size), total=len(X), smoothing=0, **tqdm_kwargs) else: retval_par = p.map(_spawn_fun, X, [f] * len(X), range(len(X)), chunk_size=chunk_size) retval = list(map(lambda res_dict: res_dict["res"], retval_par)) # make it like the original map p.terminate() # for res_dict in retval_par: # add all experiments params we missed # curr_params_list = res_dict["curr_params_list"] # for param in curr_params_list: # cn.add_experiment_param(param) # cn.experiment_purpose = retval_par[0]["experiment_purpose"] # use the "experiment_purpose" from the fork # function_cache.merge_cache_dicts_from_parallel_runs(map(lambda a: a["functions_dict"], retval_par)) # merge all proc_count = old_proc_count global i i += 1 except AssertionError as e: if str(e) == "daemonic processes are not allowed to have children": retval = map(f, X) # can't have pool inside pool else: print("error message is: " + str(e)) raise # re-raise orig exception return retval
def parmap(f: Callable, X: List[object], nprocs=multiprocessing.cpu_count(), force_parallel=False, chunk_size=1, use_tqdm=False, keep_child_tqdm=True, **tqdm_kwargs) -> list: """ Utility function for doing parallel calculations with multiprocessing. Splits the parameters into chunks (if wanted) and calls. Equivalent to list(map(func, params_iter)) Args: f: The function we want to calculate for each element X: The parameters for the function (each element ins a list) chunk_size: Optional, the chunk size for the workers to work on nprocs: The number of procs to use (defaults for all cores) use_tqdm: Whether to use tqdm (default to False) tqdm_kwargs: kwargs passed to tqdm Returns: The list of results after applying func to each element Has problems with using self.___ as variables in f (causes self to be pickled) """ if len(X) == 0: return [] # like map if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = int(max(1, min(nprocs, len(X) / chunk_size))) # at least 1 if len(X) < nprocs: if nprocs != multiprocessing.cpu_count(): print("parmap too much procs") nprocs = len(X) # too much procs args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X)) if chunk_size > 1: args = list(chunk_iterator(args, chunk_size)) s_fun = _chunk_spawn_fun # spawn fun else: s_fun = _spawn_fun # spawn fun if (nprocs == 1 and not force_parallel ) or force_serial: # we want it serial (maybe for profiling) return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) # can throw if current proc is daemon if use_tqdm: retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args), total=int(len(X) / chunk_size), **tqdm_kwargs) else: # import pdb # pdb.set_trace() retval_par = p.map(lambda arg: s_fun(arg), args) retval = list(retval_par) # make it like the original map if chunk_size > 1: retval = flatten(retval) p.terminate() proc_count = old_proc_count global i i += 1 except AssertionError as e: # if e == "daemonic processes are not allowed to have children": retval = list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) # can't have pool inside pool return retval
if args.problem == "perf": print("Starting performance simulation:") print("Pmax = {0}".format(Pmax.tolist())) var = Pmax sim_call = perf_call elif args.problem == "outdated": print("Starting outdated CSI simulation:") print("Update frequencies = {0}".format(update_freq.tolist())) var = update_freq sim_call = outdated_call inputs = gen_inputs(var, num_iter) pool = ProcessPool(args.num_procs) results = [] for r in tqdm(pool.imap(sim_call, *zip(*inputs)), total=len(var) * num_iter): results.append(r) pool.close() sum_rate, jain_idx = reduce_outputs(var, num_iter, results) print("Job done!") date = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") dir_name = "-".join([algorithm, date, solver, args.problem, "R" + str(R)]) if args.problem == "outdated": dir_name += "-doppler" + str(doppler) if not data_path.exists(): print("Creating data directory") data_path.mkdir()
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE from pathos.helpers import freeze_support freeze_support() # instantiate and configure the worker pool from pathos.pools import ProcessPool pool = ProcessPool(nodes=4) _result = map(pow, [1,2,3,4], [5,6,7,8]) # do a blocking map on the chosen function result = pool.map(pow, [1,2,3,4], [5,6,7,8]) assert result == _result # do a non-blocking map, then extract the result from the iterator result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8]) result = list(result_iter) assert result == _result # do an asynchronous map, then get the results result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8]) result = result_queue.get() assert result == _result