def pathos_mp_batch_evaluator(
    func,
    arguments,
    n_cores=N_CORES,
    error_handling="continue",
    unpack_symbol=None,
):
    """Batch evaluator based on pathos.multiprocess.ProcessPool

    This uses a patched but older version of python multiprocessing that replaces
    pickling with dill and can thus handle decorated functions.

    Args:
        func (Callable): The function that is evaluated.
        arguments (Iterable): Arguments for the functions. Their interperation
            depends on the unpack argument.
        n_cores (int): Number of cores used to evaluate the function in parallel.
            Value below one are interpreted as one. If only one core is used, the
            batch evaluator disables everything that could cause problems, i.e. in that
            case func and arguments are never pickled and func is executed in the main
            process.
        error_handling (str): Can take the values "raise" (raise the error and stop all
            tasks as soon as one task fails) and "continue" (catch exceptions and set
            the output of failed tasks to the exception object without raising it.
            KeyboardInterrupt and SystemExit are always raised.
        unpack_symbol (str or None). Can be "**", "*" or None. If None, func just takes
            one argument. If "*", the elements of arguments are positional arguments for
            func. If "**", the elements of arguments are keyword arguments for func.


    Returns:
        list: The function evaluations.

    """
    if not pathos_is_available:
        raise NotImplementedError(
            "To use the pathos_mp_batch_evaluator, install pathos with "
            "conda install -c conda-forge pathos.")

    _check_inputs(func, arguments, n_cores, error_handling, unpack_symbol)
    n_cores = int(n_cores)

    reraise = error_handling == "raise"

    @unpack(symbol=unpack_symbol)
    @catch(default="__traceback__", reraise=reraise)
    def internal_func(*args, **kwargs):
        return func(*args, **kwargs)

    if n_cores <= 1:
        res = [internal_func(arg) for arg in arguments]
    else:
        p = ProcessPool(nodes=n_cores)
        try:
            res = p.map(internal_func, arguments)
        except Exception as e:
            p.terminate()
            raise e

    return res
Beispiel #2
0
class MultiprocessingDistributor(DistributorBaseClass):
    """
    Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine.
    """

    def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction",
                 show_warnings=True):
        """
        Creates a new MultiprocessingDistributor instance

        :param n_workers: How many workers should the multiprocessing pool have?
        :type n_workers: int
        :param disable_progressbar: whether to show a progressbar or not.
        :type disable_progressbar: bool
        :param progressbar_title: the title of the progressbar
        :type progressbar_title: basestring
        :param show_warnings: whether to show warnings or not.
        :type show_warnings: bool
        """
        self.pool = Pool(nodes=n_workers)
        self.n_workers = n_workers
        self.disable_progressbar = disable_progressbar
        self.progressbar_title = progressbar_title

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to a thread pool

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        return self.pool.imap(partial(func, **kwargs), partitioned_chunks)

    def close(self):
        """
        Collects the result from the workers and closes the thread pool.
        """
        self.pool.close()
        self.pool.terminate()
        self.pool.join()
Beispiel #3
0
def parmap(f,
           X,
           nprocs=multiprocessing.cpu_count(),
           chunk_size=1,
           use_tqdm=False,
           **tqdm_kwargs):

    if len(X) == 0:
        return []  # like map

    # nprocs = min(nprocs, cn.max_procs)
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1
    nprocs = int(max(1, min(nprocs, len(X) / chunk_size)))  # at least 1
    if len(X) < nprocs:
        if nprocs != multiprocessing.cpu_count():
            print("parmap too much procs")
        nprocs = len(X)  # too much procs

    if force_serial or nprocs == 1:  # we want it serial (maybe for profiling)
        return list(map(f, tqdm(X, smoothing=0, **tqdm_kwargs)))

    def _spawn_fun(input, func, c):
        import random, numpy
        random.seed(1554 + i + c)
        numpy.random.seed(42 + i + c)  # set random seeds
        try:
            res = func(input)
            res_dict = dict()
            res_dict["res"] = res
            # res_dict["functions_dict"] = function_cache2.caches_dicts
            # res_dict["experiment_purpose"] = cn2.experiment_purpose
            # res_dict["curr_params_list"] = cn2.curr_experiment_params_list
            return res_dict
        except:
            import traceback
            traceback.print_exc()
            raise  # re-raise exception

    # if chunk_size == 1:
    #     chunk_size = math.ceil(float(len(X)) / nprocs)  # all procs work on an equal chunk

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        # can throw if current proc is daemon
        if use_tqdm:
            retval_par = tqdm(p.imap(_spawn_fun,
                                     X, [f] * len(X),
                                     range(len(X)),
                                     chunk_size=chunk_size),
                              total=len(X),
                              smoothing=0,
                              **tqdm_kwargs)
        else:
            retval_par = p.map(_spawn_fun,
                               X, [f] * len(X),
                               range(len(X)),
                               chunk_size=chunk_size)

        retval = list(map(lambda res_dict: res_dict["res"],
                          retval_par))  # make it like the original map

        p.terminate()
        # for res_dict in retval_par:  # add all experiments params we missed
        #     curr_params_list = res_dict["curr_params_list"]
        #     for param in curr_params_list:
        #         cn.add_experiment_param(param)
        # cn.experiment_purpose = retval_par[0]["experiment_purpose"]  # use the "experiment_purpose" from the fork
        # function_cache.merge_cache_dicts_from_parallel_runs(map(lambda a: a["functions_dict"], retval_par))  # merge all
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        if str(e) == "daemonic processes are not allowed to have children":
            retval = map(f, X)  # can't have pool inside pool
        else:
            print("error message is: " + str(e))
            raise  # re-raise orig exception
    return retval
Beispiel #4
0
def parmap(f: Callable,
           X: List[object],
           nprocs=multiprocessing.cpu_count(),
           force_parallel=False,
           chunk_size=1,
           use_tqdm=False,
           keep_child_tqdm=True,
           **tqdm_kwargs) -> list:
    """
    Utility function for doing parallel calculations with multiprocessing.
    Splits the parameters into chunks (if wanted) and calls.
    Equivalent to list(map(func, params_iter))
    Args:
        f: The function we want to calculate for each element
        X: The parameters for the function (each element ins a list)
        chunk_size: Optional, the chunk size for the workers to work on
        nprocs: The number of procs to use (defaults for all cores)
        use_tqdm: Whether to use tqdm (default to False)
        tqdm_kwargs: kwargs passed to tqdm

    Returns:
        The list of results after applying func to each element

    Has problems with using self.___ as variables in f (causes self to be pickled)
    """
    if len(X) == 0:
        return []  # like map
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1

    nprocs = int(max(1, min(nprocs, len(X) / chunk_size)))  # at least 1
    if len(X) < nprocs:
        if nprocs != multiprocessing.cpu_count():
            print("parmap too much procs")
        nprocs = len(X)  # too much procs

    args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X))
    if chunk_size > 1:
        args = list(chunk_iterator(args, chunk_size))
        s_fun = _chunk_spawn_fun  # spawn fun
    else:
        s_fun = _spawn_fun  # spawn fun

    if (nprocs == 1 and not force_parallel
        ) or force_serial:  # we want it serial (maybe for profiling)
        return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs)))

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        # can throw if current proc is daemon
        if use_tqdm:
            retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args),
                              total=int(len(X) / chunk_size),
                              **tqdm_kwargs)
        else:
            # import  pdb
            # pdb.set_trace()
            retval_par = p.map(lambda arg: s_fun(arg), args)

        retval = list(retval_par)  # make it like the original map
        if chunk_size > 1:
            retval = flatten(retval)

        p.terminate()
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        # if e == "daemonic processes are not allowed to have children":
        retval = list(map(f,
                          tqdm(X, disable=not use_tqdm,
                               **tqdm_kwargs)))  # can't have pool inside pool
    return retval