def imap(self, iterable, chunksize=1): """ Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()` """ if self.processes == 1: return self.imap_trivial(iterable) else: # This is derived from super().imap, but using IMapIteratorLocal instead of IMapIterator assert self._state == RUN if chunksize == 1: result = IMapIteratorLocal(self.callback, self) self._taskqueue.put( (((result._job, i, _internal_worker, (x, ), {}) for i, x in enumerate(iterable)), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(_internal_worker, iterable, chunksize) result = IMapIteratorLocal(self.callback, self) self._taskqueue.put((((result._job, i, mapstar, (x, ), {}) for i, x in enumerate(task_batches)), result._set_length)) return (item for chunk in result for item in chunk)
def imap_unordered(self, func, iterable, second_argument, chunksize=1): ''' Like `imap()` method but ordering of results is arbitrary ''' assert self._state == RUN if chunksize == 1: result = IMapUnorderedIterator(self._cache) self._taskqueue.put((((result._job, i, func, (x, second_argument), {}) for i, x in enumerate(iterable)), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapUnorderedIterator(self._cache) self._taskqueue.put((((result._job, i, mapstar, (x, second_argument), {}) for i, x in enumerate(task_batches)), result._set_length)) return (item for chunk in result for item in chunk)
def imap(self, func, iterable, chunksize=1): ''' Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()` ''' assert self._state == RUN if chunksize == 1: result = IMapIterator(self._cache) self._taskqueue.put((((result._job, i, func, (x,), {}) for i, x in enumerate(iterable)), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapIterator(self._cache) self._taskqueue.put((((result._job, i, mapstar, (x,), {}) for i, x in enumerate(task_batches)), result._set_length)) return (item for chunk in result for item in chunk)
def imap_unordered(self, func, iterable, second_argument, chunksize=1): ''' Like `imap()` method but ordering of results is arbitrary ''' assert self._state == RUN if chunksize == 1: result = IMapUnorderedIterator(self._cache) self._taskqueue.put( (((result._job, i, func, (x, second_argument), {}) for i, x in enumerate(iterable)), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapUnorderedIterator(self._cache) self._taskqueue.put( (((result._job, i, mapstar, (x, second_argument), {}) for i, x in enumerate(task_batches)), result._set_length)) return (item for chunk in result for item in chunk)
def map_async(self, func, iterable, chunksize=None, callback=None): ''' Asynchronous equivalent of `map()` builtin ''' assert self._state == RUN if not hasattr(iterable, '__len__'): iterable = list(iterable) if chunksize is None: chunksize, extra = divmod(len(iterable), len(self._pool) * 4) if extra: chunksize += 1 if len(iterable) == 0: chunksize = 0 task_batches = Pool._get_tasks(func, iterable, chunksize) result = MapResult(self._cache, chunksize, len(iterable), callback) self._taskqueue.put((((result._job, i, mapstar, (x,), {}) for i, x in enumerate(task_batches)), None)) return result
def map_async(self, func, iterable, chunksize=None, callback=None): ''' Asynchronous equivalent of `map()` builtin ''' assert self._state == RUN if not hasattr(iterable, '__len__'): iterable = list(iterable) if chunksize is None: chunksize, extra = divmod(len(iterable), len(self._pool) * 4) if extra: chunksize += 1 if len(iterable) == 0: chunksize = 0 task_batches = Pool._get_tasks(func, iterable, chunksize) result = SafeMapResult(self._cache, chunksize, len(iterable), callback) self._taskqueue.put((((result._job, i, mapstar, (x, ), {}) for i, x in enumerate(task_batches)), None)) return result
def imap_unordered(self, func, iterable, chunksize): """Customized version of imap_unordered. Directly send chunks to func, instead of iterating in each process and sending one by one. Original: https://hg.python.org/cpython/file/tip/Lib/multiprocessing/pool.py#l271 Other tried options: - map_async: makes a list(iterable), so it loads all the data for each process into RAM - apply_async: needs manual chunking """ assert self._state == RUN task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapUnorderedIterator(self._cache) tasks = ((result._job, i, func, chunk, {}) for i, (_, chunk) in enumerate(task_batches)) self._taskqueue.put((tasks, result._set_length)) return result