Python ProcessPool.imapの例、pathos.pools.ProcessPool.imap Pythonの例

コード例 #1

0

ファイルを表示

    def test_10_write_starvation(self):
        # make sure write query do not starve
        # when issuing a large number of read queries
        # alongside a single write query
        # we dont want the write query to have to wait for
        # too long, consider the following sequence:
        # R, W, R, R, R, R, R, R, R...
        # if write is starved our write query might have to wait
        # for all queued read queries to complete while holding
        # Redis global lock, this will hurt performance
        #
        # this test issues a similar sequence of queries and
        # validates that the write query wasn't delayed too much

        self.graph = Graph(self.conn, GRAPH_ID)
        pool = Pool(nodes=CLIENT_COUNT)

        Rq = "UNWIND range(0, 10000) AS x WITH x WHERE x = 9999 RETURN 'R', timestamp()"
        Wq = "UNWIND range(0, 1000) AS x WITH x WHERE x = 27 CREATE ({v:1}) RETURN 'W', timestamp()"
        Slowq = "UNWIND range(0, 100000) AS x WITH x WHERE (x % 73) = 0 RETURN count(1)"

        # issue a number of slow queries, this will give us time to fill up
        # RedisGraph internal threadpool queue
        queries = [Slowq] * CLIENT_COUNT * 5
        nulls = [None] * CLIENT_COUNT * 5

        # issue queries asynchronously
        pool.imap(thread_run_query, queries, nulls)

        # create a long sequence of read queries
        queries = [Rq] * CLIENT_COUNT * 10
        nulls = [None] * CLIENT_COUNT * 10

        # inject a single write query close to the begining on the sequence
        queries[CLIENT_COUNT] = Wq

        # invoke queries
        # execute queries in parallel
        results = pool.map(thread_run_query, queries, nulls)

        # count how many queries completed before the write query
        count = 0
        write_ts = results[CLIENT_COUNT]["result_set"][0][1]
        for result in results:
            row = result["result_set"][0]
            ts = row[1]
            if ts < write_ts:
                count += 1

        # make sure write query wasn't starved
        self.env.assertLessEqual(count, len(queries) * 0.3)

        # delete the key
        self.conn.delete(GRAPH_ID)

コード例 #2

0

ファイルを表示

ファイル: qmatrix.py プロジェクト: duducosmos/algoritmogenetico

    def avaliacao(populacao):
        x = valores(populacao)
        n = len(populacao)

        es = list(set(ql.get_states[0]))
        def steps(k):
            Q = zeros(R.shape)
            Q[ql.get_states] = x[k, :]
            return sum([ql.move(e, Q=Q) for e in es])

        pool = Pool(nodes=12)
        peso = -array(list(pool.imap(steps, range(n)))).astype(int)
        return peso

コード例 #3

0

ファイルを表示

ファイル: distribution.py プロジェクト: joshuadebellis/tsfresh

class MultiprocessingDistributor(DistributorBaseClass):
    """
    Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine.
    """

    def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction",
                 show_warnings=True):
        """
        Creates a new MultiprocessingDistributor instance

        :param n_workers: How many workers should the multiprocessing pool have?
        :type n_workers: int
        :param disable_progressbar: whether to show a progressbar or not.
        :type disable_progressbar: bool
        :param progressbar_title: the title of the progressbar
        :type progressbar_title: basestring
        :param show_warnings: whether to show warnings or not.
        :type show_warnings: bool
        """
        self.pool = Pool(nodes=n_workers)
        self.n_workers = n_workers
        self.disable_progressbar = disable_progressbar
        self.progressbar_title = progressbar_title

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to a thread pool

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        return self.pool.imap(partial(func, **kwargs), partitioned_chunks)

    def close(self):
        """
        Collects the result from the workers and closes the thread pool.
        """
        self.pool.close()
        self.pool.terminate()
        self.pool.join()

コード例 #4

0

ファイルを表示

def test_mp():
    # instantiate and configure the worker pool
    from pathos.pools import ProcessPool
    pool = ProcessPool(nodes=4)

    _result = list(map(pow, [1, 2, 3, 4], [5, 6, 7, 8]))

    # do a blocking map on the chosen function
    result = pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8])
    assert result == _result

    # do a non-blocking map, then extract the result from the iterator
    result_iter = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
    result = list(result_iter)
    assert result == _result

    # do an asynchronous map, then get the results
    result_queue = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
    result = result_queue.get()
    assert result == _result

コード例 #5

0

ファイルを表示

def test_mp():
    # instantiate and configure the worker pool
    from pathos.pools import ProcessPool
    pool = ProcessPool(nodes=4)

    _result = list(map(pow, [1,2,3,4], [5,6,7,8])) 

    # do a blocking map on the chosen function
    result = pool.map(pow, [1,2,3,4], [5,6,7,8])
    assert result == _result

    # do a non-blocking map, then extract the result from the iterator
    result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8])
    result = list(result_iter)
    assert result == _result

    # do an asynchronous map, then get the results
    result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8])
    result = result_queue.get()
    assert result == _result

コード例 #6

0

ファイルを表示

ファイル: plotutils.py プロジェクト: sofiasanz/sisl

def run_multiple(func,
                 *args,
                 argsList=None,
                 kwargsList=None,
                 messageFn=None,
                 serial=False):
    """
    Makes use of the pathos.multiprocessing module to run a function simultanously multiple times.
    This is meant mainly to update multiple plots at the same time, which can accelerate significantly the process of visualizing data.

    All arguments passed to the function, except func, can be passed as specified in the arguments section of this documentation
    or as a list containing multiple instances of them.
    If a list is passed, each time the function needs to be run it will take the next item of the list.
    If a single item is passed instead, this item will be repeated for each function run.
    However, at least one argument must be a list, so that the number of times that the function has to be ran is defined.

    Arguments
    ----------
    func: function
        The function to be executed. It has to be prepared to recieve the arguments as they are provided to it (zipped).

        See the applyMethod() function as an example.
    *args:
        Contains all the arguments that are specific to the individual function that we want to run.
        See each function separately to understand what you need to pass (you may not need this parameter).
    argsList: array-like
        An array of arguments that have to be passed to the executed function.

        Can also be a list of arrays (see this function's description).

        WARNING: Currently it only works properly for a list of arrays. Didn't fix this because the lack of interest
        of argsList on Plot's methods (everything is passed as keyword arguments).
    kwargsList: dict
        A dictionary with the keyword arguments that have to be passed to the executed function.

        If the executed function is a Plot's method, these can be the settings, for example.

        Can also be a list of dicts (see this function's description).

    messageFn: function
        Function that recieves the number of tasks and nodes and needs to return a string to display as a description of the progress bar.
    serial: bool
        If set to true, multiprocessing is not used.

        This seems to have little sense, but it is useful to switch easily between multiprocessing and serial with the same code.

    Returns
    ----------
    results: list
        A list with all the returned values or objects from each function execution.
        This list is ordered, so results[0] is the result of executing the function with argsList[0] and kwargsList[0].  
    """

    #Prepare the arguments to be passed to the initSinglePlot function
    toZip = [*args, argsList, kwargsList]
    for i, arg in enumerate(toZip):
        if not isinstance(arg, (list, tuple, np.ndarray)):
            toZip[i] = itertools.repeat(arg)
        else:
            nTasks = len(arg)

    # Run things in serial mode in case it is demanded
    serial = serial or _MAX_NPROCS == 1 or nTasks == 1
    if serial:
        return [func(argsTuple) for argsTuple in zip(*toZip)]

    #Create a pool with the appropiate number of processes
    pool = Pool(min(nTasks, _MAX_NPROCS))
    #Define the plots array to store all the plots that we initialize
    results = [None] * nTasks

    #Initialize the pool iterator and the progress bar that controls it
    progress = tqdm.tqdm(pool.imap(func, zip(*toZip)), total=nTasks)

    #Set a description for the progress bar
    if not callable(messageFn):
        message = "Updating {} plots in {} processes".format(
            nTasks, pool.nodes)
    else:
        message = messageFn(nTasks, pool.nodes)

    progress.set_description(message)

    #Run the processes and store each result in the plots array
    for i, res in enumerate(progress):
        results[i] = res

    pool.close()
    pool.join()
    pool.clear()

    return results

コード例 #7

0

ファイルを表示

class ReMap(ReIterBase):
    def __init__(self,
                 fn,
                 iterable_input,
                 proc_type=None,
                 n_proc=1,
                 per_proc_buffer=1,
                 ordered=True,
                 name='reMap',
                 verbose=True):
        """
        This is a map function that can be iterated over more than once. Returns an iterator.

        Parameters
        ----------
        fn
        iterable_input
            iterable input

        proc_type
            if 'sub' then uses a pathos ProcessPool to map function
            if 'thread' then uses standard multiprocessing ThreadPool
            else uses regular map

        n_proc
            number of workers in a pool (ignored if no pool)

        per_proc_buffer
            since pool's map function does not know limits, there is a forced stop-and-yield-all after
            this many processed tasks per process/thread

        ordered
            use ordered map by default, uses `imap_unordered` otherwise

        name
            name to use for logging messages

        verbose
        """
        name += '' if proc_type not in ('sub', 'proc', 'subprocess', 'th',
                                        'thread') else ' ' + proc_type
        super().__init__(iterable_input=iterable_input,
                         name=name,
                         verbose=verbose)
        self.fn = fn
        self.proc_type = proc_type
        self.per_proc_buffer = per_proc_buffer
        self.n_proc = n_proc
        self.ordered = ordered

    def _iter(self):
        if self.proc_type in ('thread', 'th') and self.n_proc > 0:
            with ThreadPool(self.n_proc) as p:
                # this is a workaround for limiting input iterator consumption, got it from SO
                buff = []
                for itm in self.iterable_input:
                    buff.append(itm)
                    if len(buff) >= self.per_proc_buffer * self.n_proc:

                        if self.ordered:
                            for itm in p.imap(self.fn, buff):
                                yield itm
                        else:
                            for itm in p.imap_unordered(self.fn, buff):
                                yield itm
                        buff = []

                # feed the remaining buffer after input is exhausted
                if self.ordered:
                    for itm in p.imap(self.fn, buff):
                        yield itm
                else:
                    for itm in p.imap_unordered(self.fn, buff):
                        yield itm

        elif self.proc_type in ('sub', 'proc',
                                'subprocess') and self.n_proc > 0:
            try:
                log.info("Trying to terminate previous pool")
                # this is stupid, but that's how pathos is built
                self.pool.terminate()
                self.pool.clear()
                log.info("Yay! Cleared previous process pool")
            except AttributeError:
                log.warning("Is this the first time creating a pool...")

            self.pool = ProcessPool(nodes=self.n_proc)

            # this is a workaround for limiting input iterator consumption, got it from SO
            buff = []
            for itm in self.iterable_input:
                buff.append(itm)
                if len(buff) >= self.per_proc_buffer * self.n_proc:
                    if self.ordered:
                        for itm in self.pool.imap(self.fn, buff):
                            yield itm
                    else:
                        for itm in self.pool.uimap(self.fn, buff):
                            yield itm
                    buff = []

            # feed the remaining buffer after input is exhausted
            if self.ordered:
                for itm in self.pool.imap(self.fn, buff):
                    yield itm
            else:
                for itm in self.pool.uimap(self.fn, buff):
                    yield itm

        else:
            for itm in map(self.fn, self.iterable_input):
                yield itm

コード例 #8

0

ファイルを表示

#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE

from pathos.helpers import freeze_support
freeze_support()

# instantiate and configure the worker pool
from pathos.pools import ProcessPool
pool = ProcessPool(nodes=4)

_result = map(pow, [1, 2, 3, 4], [5, 6, 7, 8])

# do a blocking map on the chosen function
result = pool.map(pow, [1, 2, 3, 4], [5, 6, 7, 8])
assert result == _result

# do a non-blocking map, then extract the result from the iterator
result_iter = pool.imap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
result = list(result_iter)
assert result == _result

# do an asynchronous map, then get the results
result_queue = pool.amap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
result = result_queue.get()
assert result == _result

コード例 #9

0

ファイルを表示

ファイル: run_job.py プロジェクト: brianlevis/more-cores

import pickle
import sys
import dill
from pathos.pools import ProcessPool

id_number, num_cores = sys.argv[1:3]

with open('data_{}.pkl'.format(id_number), 'rb') as fp:
    func, args_set = dill.load(fp)

pool = ProcessPool(nodes=int(num_cores))
results = list(pool.imap(func, *zip(*args_set)))

# results = [
#     func(*args) for args in args_set
# ]

with open('results_{}.pkl'.format(id_number), 'wb') as fp:
    pickle.dump(results, fp)

コード例 #10

0

ファイルを表示

def parmap(f,
           X,
           nprocs=multiprocessing.cpu_count(),
           chunk_size=1,
           use_tqdm=False,
           **tqdm_kwargs):

    if len(X) == 0:
        return []  # like map

    # nprocs = min(nprocs, cn.max_procs)
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1
    nprocs = int(max(1, min(nprocs, len(X) / chunk_size)))  # at least 1
    if len(X) < nprocs:
        if nprocs != multiprocessing.cpu_count():
            print("parmap too much procs")
        nprocs = len(X)  # too much procs

    if force_serial or nprocs == 1:  # we want it serial (maybe for profiling)
        return list(map(f, tqdm(X, smoothing=0, **tqdm_kwargs)))

    def _spawn_fun(input, func, c):
        import random, numpy
        random.seed(1554 + i + c)
        numpy.random.seed(42 + i + c)  # set random seeds
        try:
            res = func(input)
            res_dict = dict()
            res_dict["res"] = res
            # res_dict["functions_dict"] = function_cache2.caches_dicts
            # res_dict["experiment_purpose"] = cn2.experiment_purpose
            # res_dict["curr_params_list"] = cn2.curr_experiment_params_list
            return res_dict
        except:
            import traceback
            traceback.print_exc()
            raise  # re-raise exception

    # if chunk_size == 1:
    #     chunk_size = math.ceil(float(len(X)) / nprocs)  # all procs work on an equal chunk

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        # can throw if current proc is daemon
        if use_tqdm:
            retval_par = tqdm(p.imap(_spawn_fun,
                                     X, [f] * len(X),
                                     range(len(X)),
                                     chunk_size=chunk_size),
                              total=len(X),
                              smoothing=0,
                              **tqdm_kwargs)
        else:
            retval_par = p.map(_spawn_fun,
                               X, [f] * len(X),
                               range(len(X)),
                               chunk_size=chunk_size)

        retval = list(map(lambda res_dict: res_dict["res"],
                          retval_par))  # make it like the original map

        p.terminate()
        # for res_dict in retval_par:  # add all experiments params we missed
        #     curr_params_list = res_dict["curr_params_list"]
        #     for param in curr_params_list:
        #         cn.add_experiment_param(param)
        # cn.experiment_purpose = retval_par[0]["experiment_purpose"]  # use the "experiment_purpose" from the fork
        # function_cache.merge_cache_dicts_from_parallel_runs(map(lambda a: a["functions_dict"], retval_par))  # merge all
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        if str(e) == "daemonic processes are not allowed to have children":
            retval = map(f, X)  # can't have pool inside pool
        else:
            print("error message is: " + str(e))
            raise  # re-raise orig exception
    return retval

コード例 #11

0

ファイルを表示

def parmap(f: Callable,
           X: List[object],
           nprocs=multiprocessing.cpu_count(),
           force_parallel=False,
           chunk_size=1,
           use_tqdm=False,
           keep_child_tqdm=True,
           **tqdm_kwargs) -> list:
    """
    Utility function for doing parallel calculations with multiprocessing.
    Splits the parameters into chunks (if wanted) and calls.
    Equivalent to list(map(func, params_iter))
    Args:
        f: The function we want to calculate for each element
        X: The parameters for the function (each element ins a list)
        chunk_size: Optional, the chunk size for the workers to work on
        nprocs: The number of procs to use (defaults for all cores)
        use_tqdm: Whether to use tqdm (default to False)
        tqdm_kwargs: kwargs passed to tqdm

    Returns:
        The list of results after applying func to each element

    Has problems with using self.___ as variables in f (causes self to be pickled)
    """
    if len(X) == 0:
        return []  # like map
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1

    nprocs = int(max(1, min(nprocs, len(X) / chunk_size)))  # at least 1
    if len(X) < nprocs:
        if nprocs != multiprocessing.cpu_count():
            print("parmap too much procs")
        nprocs = len(X)  # too much procs

    args = zip(X, [f] * len(X), range(len(X)), [keep_child_tqdm] * len(X))
    if chunk_size > 1:
        args = list(chunk_iterator(args, chunk_size))
        s_fun = _chunk_spawn_fun  # spawn fun
    else:
        s_fun = _spawn_fun  # spawn fun

    if (nprocs == 1 and not force_parallel
        ) or force_serial:  # we want it serial (maybe for profiling)
        return list(map(f, tqdm(X, disable=not use_tqdm, **tqdm_kwargs)))

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        # can throw if current proc is daemon
        if use_tqdm:
            retval_par = tqdm(p.imap(lambda arg: s_fun(arg), args),
                              total=int(len(X) / chunk_size),
                              **tqdm_kwargs)
        else:
            # import  pdb
            # pdb.set_trace()
            retval_par = p.map(lambda arg: s_fun(arg), args)

        retval = list(retval_par)  # make it like the original map
        if chunk_size > 1:
            retval = flatten(retval)

        p.terminate()
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        # if e == "daemonic processes are not allowed to have children":
        retval = list(map(f,
                          tqdm(X, disable=not use_tqdm,
                               **tqdm_kwargs)))  # can't have pool inside pool
    return retval

コード例 #12

0

ファイルを表示

    if args.problem == "perf":
        print("Starting performance simulation:")
        print("Pmax = {0}".format(Pmax.tolist()))
        var = Pmax
        sim_call = perf_call
    elif args.problem == "outdated":
        print("Starting outdated CSI simulation:")
        print("Update frequencies = {0}".format(update_freq.tolist()))
        var = update_freq
        sim_call = outdated_call

    inputs = gen_inputs(var, num_iter)

    pool = ProcessPool(args.num_procs)
    results = []
    for r in tqdm(pool.imap(sim_call, *zip(*inputs)),
                  total=len(var) * num_iter):
        results.append(r)
    pool.close()
    sum_rate, jain_idx = reduce_outputs(var, num_iter, results)

    print("Job done!")

    date = datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
    dir_name = "-".join([algorithm, date, solver, args.problem, "R" + str(R)])
    if args.problem == "outdated":
        dir_name += "-doppler" + str(doppler)

    if not data_path.exists():
        print("Creating data directory")
        data_path.mkdir()

コード例 #13

0

ファイルを表示

ファイル: test_mp.py プロジェクト: daodaoliang/pathos

#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE

from pathos.helpers import freeze_support
freeze_support()

# instantiate and configure the worker pool
from pathos.pools import ProcessPool
pool = ProcessPool(nodes=4)

_result = map(pow, [1,2,3,4], [5,6,7,8]) 

# do a blocking map on the chosen function
result = pool.map(pow, [1,2,3,4], [5,6,7,8])
assert result == _result

# do a non-blocking map, then extract the result from the iterator
result_iter = pool.imap(pow, [1,2,3,4], [5,6,7,8])
result = list(result_iter)
assert result == _result

# do an asynchronous map, then get the results
result_queue = pool.amap(pow, [1,2,3,4], [5,6,7,8])
result = result_queue.get()
assert result == _result