def pathos_mp_batch_evaluator( func, arguments, n_cores=N_CORES, error_handling="continue", unpack_symbol=None, ): """Batch evaluator based on pathos.multiprocess.ProcessPool This uses a patched but older version of python multiprocessing that replaces pickling with dill and can thus handle decorated functions. Args: func (Callable): The function that is evaluated. arguments (Iterable): Arguments for the functions. Their interperation depends on the unpack argument. n_cores (int): Number of cores used to evaluate the function in parallel. Value below one are interpreted as one. If only one core is used, the batch evaluator disables everything that could cause problems, i.e. in that case func and arguments are never pickled and func is executed in the main process. error_handling (str): Can take the values "raise" (raise the error and stop all tasks as soon as one task fails) and "continue" (catch exceptions and set the output of failed tasks to the exception object without raising it. KeyboardInterrupt and SystemExit are always raised. unpack_symbol (str or None). Can be "**", "*" or None. If None, func just takes one argument. If "*", the elements of arguments are positional arguments for func. If "**", the elements of arguments are keyword arguments for func. Returns: list: The function evaluations. """ if not pathos_is_available: raise NotImplementedError( "To use the pathos_mp_batch_evaluator, install pathos with " "conda install -c conda-forge pathos.") _check_inputs(func, arguments, n_cores, error_handling, unpack_symbol) n_cores = int(n_cores) reraise = error_handling == "raise" @unpack(symbol=unpack_symbol) @catch(default="__traceback__", reraise=reraise) def internal_func(*args, **kwargs): return func(*args, **kwargs) if n_cores <= 1: res = [internal_func(arg) for arg in arguments] else: p = ProcessPool(nodes=n_cores) try: res = p.map(internal_func, arguments) except Exception as e: p.terminate() raise e return res
class MultiprocessingDistributor(DistributorBaseClass): """ Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine. """ def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction", show_warnings=True): """ Creates a new MultiprocessingDistributor instance :param n_workers: How many workers should the multiprocessing pool have? :type n_workers: int :param disable_progressbar: whether to show a progressbar or not. :type disable_progressbar: bool :param progressbar_title: the title of the progressbar :type progressbar_title: basestring :param show_warnings: whether to show warnings or not. :type show_warnings: bool """ self.pool = Pool(nodes=n_workers) self.n_workers = n_workers self.disable_progressbar = disable_progressbar self.progressbar_title = progressbar_title def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to a thread pool :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ return self.pool.imap(partial(func, **kwargs), partitioned_chunks) def close(self): """ Collects the result from the workers and closes the thread pool. """ self.pool.close() self.pool.terminate() self.pool.join()
def parmap(f, X, nprocs=multiprocessing.cpu_count(), chunk_size=1, use_tqdm=False, **tqdm_kwargs): if len(X) == 0: return [] # like map # nprocs = min(nprocs, cn.max_procs) if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = int(max(1, min(nprocs, len(X) / chunk_size))) # at least 1 if len(X) < nprocs: if nprocs != multiprocessing.cpu_count(): print("parmap too much procs") nprocs = len(X) # too much procs if force_serial or nprocs == 1: # we want it serial (maybe for profiling) return list(map(f, tqdm(X, smoothing=0, **tqdm_kwargs))) def _spawn_fun(input, func, c): import random, numpy random.seed(1554 + i + c) numpy.random.seed(42 + i + c) # set random seeds try: res = func(input) res_dict = dict() res_dict["res"] = res # res_dict["functions_dict"] = function_cache2.caches_dicts # res_dict["experiment_purpose"] = cn2.experiment_purpose # res_dict["curr_params_list"] = cn2.curr_experiment_params_list return res_dict except: import traceback traceback.print_exc() raise # re-raise exception # if chunk_size == 1: # chunk_size = math.ceil(float(len(X)) / nprocs) # all procs work on an equal chunk try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) # can throw if current proc is daemon if use_tqdm: retval_par = tqdm(p.imap(_spawn_fun, X, [f] * len(X), range(len(X)), chunk_size=chunk_size), total=len(X), smoothing=0, **tqdm_kwargs) else: retval_par = p.map(_spawn_fun, X, [f] * len(X), range(len(X)), chunk_size=chunk_size) retval = list(map(lambda res_dict: res_dict["res"], retval_par)) # make it like the original map p.terminate() # for res_dict in retval_par: # add all experiments params we missed # curr_params_list = res_dict["curr_params_list"] # for param in curr_params_list: # cn.add_experiment_param(param) # cn.experiment_purpose = retval_par[0]["experiment_purpose"] # use the "experiment_purpose" from the fork # function_cache.merge_cache_dicts_from_parallel_runs(map(lambda a: a["functions_dict"], retval_par)) # merge all proc_count = old_proc_count global i i += 1 except AssertionError as e: if str(e) == "daemonic processes are not allowed to have children": retval = map(f, X) # can't have pool inside pool else: print("error message is: " + str(e)) raise # re-raise orig exception return retval
def parmap(f: Callable, X: List[object], nprocs=multiprocessing.cpu_count(), force_parallel=False, chunk_size=1, use_tqdm=False, keep_child_tqdm=True, **tqdm_kwargs) -> list: """ Utility function for doing parallel calculations with multiprocessing. Splits the parameters into chunks (if wanted) and calls. Equivalent to list(map(func, params_iter)) Args: f: The function we want to calculate for each element X: The parameters for the function (each element ins a list) chunk_size: Optional, the chunk size for the workers to work on nprocs: The number of procs to use (defaults for all cores) use_tqdm: Whether to use tqdm (default to False) tqdm_kwargs: kwargs passed to tqdm Returns: The list of results after applying func to each element Has problems with using self.___ as variables in f (causes self to be pickled) """ if len(X) == 0: return [] # like map if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = int(max(1, min(nprocs, len(X) / chunk_size))) # at least 1 if len(X) < nprocs: if nprocs != multiprocessing.cpu_count(): print("parmap too much procs") nprocs = len(X) # too much procs args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X)) if chunk_size > 1: args = list(chunk_iterator(args, chunk_size)) s_fun = _chunk_spawn_fun # spawn fun else: s_fun = _spawn_fun # spawn fun if (nprocs == 1 and not force_parallel ) or force_serial: # we want it serial (maybe for profiling) return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) # can throw if current proc is daemon if use_tqdm: retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args), total=int(len(X) / chunk_size), **tqdm_kwargs) else: # import pdb # pdb.set_trace() retval_par = p.map(lambda arg: s_fun(arg), args) retval = list(retval_par) # make it like the original map if chunk_size > 1: retval = flatten(retval) p.terminate() proc_count = old_proc_count global i i += 1 except AssertionError as e: # if e == "daemonic processes are not allowed to have children": retval = list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs))) # can't have pool inside pool return retval