Beispiel #1
0
    def imap(self, iterable, chunksize=1):
        """
        Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
        """
        if self.processes == 1:

            return self.imap_trivial(iterable)

        else:
            # This is derived from super().imap, but using IMapIteratorLocal instead of IMapIterator
            assert self._state == RUN
            if chunksize == 1:
                result = IMapIteratorLocal(self.callback, self)
                self._taskqueue.put(
                    (((result._job, i, _internal_worker, (x, ), {})
                      for i, x in enumerate(iterable)), result._set_length))
                return result
            else:
                assert chunksize > 1
                task_batches = Pool._get_tasks(_internal_worker, iterable,
                                               chunksize)
                result = IMapIteratorLocal(self.callback, self)
                self._taskqueue.put((((result._job, i, mapstar, (x, ), {})
                                      for i, x in enumerate(task_batches)),
                                     result._set_length))
                return (item for chunk in result for item in chunk)
Beispiel #2
0
 def imap_unordered(self, func, iterable, second_argument, chunksize=1):
     '''
     Like `imap()` method but ordering of results is arbitrary
     '''
     assert self._state == RUN
     if chunksize == 1:
         result = IMapUnorderedIterator(self._cache)
         self._taskqueue.put((((result._job, i, func, (x, second_argument), {})
                             for i, x in enumerate(iterable)), result._set_length))
         return result
     else:
         assert chunksize > 1
         task_batches = Pool._get_tasks(func, iterable, chunksize)
         result = IMapUnorderedIterator(self._cache)
         self._taskqueue.put((((result._job, i, mapstar, (x, second_argument), {})
                             for i, x in enumerate(task_batches)), result._set_length))
         return (item for chunk in result for item in chunk)
Beispiel #3
0
 def imap(self, func, iterable, chunksize=1):
     '''
     Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
     '''
     assert self._state == RUN
     if chunksize == 1:
         result = IMapIterator(self._cache)
         self._taskqueue.put((((result._job, i, func, (x,), {})
                               for i, x in enumerate(iterable)), result._set_length))
         return result
     else:
         assert chunksize > 1
         task_batches = Pool._get_tasks(func, iterable, chunksize)
         result = IMapIterator(self._cache)
         self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                               for i, x in enumerate(task_batches)), result._set_length))
         return (item for chunk in result for item in chunk)
Beispiel #4
0
 def imap_unordered(self, func, iterable, second_argument, chunksize=1):
     '''
     Like `imap()` method but ordering of results is arbitrary
     '''
     assert self._state == RUN
     if chunksize == 1:
         result = IMapUnorderedIterator(self._cache)
         self._taskqueue.put(
             (((result._job, i, func, (x, second_argument), {})
               for i, x in enumerate(iterable)), result._set_length))
         return result
     else:
         assert chunksize > 1
         task_batches = Pool._get_tasks(func, iterable, chunksize)
         result = IMapUnorderedIterator(self._cache)
         self._taskqueue.put(
             (((result._job, i, mapstar, (x, second_argument), {})
               for i, x in enumerate(task_batches)), result._set_length))
         return (item for chunk in result for item in chunk)
Beispiel #5
0
    def map_async(self, func, iterable, chunksize=None, callback=None):
        '''
        Asynchronous equivalent of `map()` builtin
        '''
        assert self._state == RUN
        if not hasattr(iterable, '__len__'):
            iterable = list(iterable)

        if chunksize is None:
            chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
            if extra:
                chunksize += 1
        if len(iterable) == 0:
            chunksize = 0

        task_batches = Pool._get_tasks(func, iterable, chunksize)
        result = MapResult(self._cache, chunksize, len(iterable), callback)
        self._taskqueue.put((((result._job, i, mapstar, (x,), {})
                              for i, x in enumerate(task_batches)), None))
        return result
Beispiel #6
0
    def map_async(self, func, iterable, chunksize=None, callback=None):
        '''
        Asynchronous equivalent of `map()` builtin
        '''
        assert self._state == RUN
        if not hasattr(iterable, '__len__'):
            iterable = list(iterable)

        if chunksize is None:
            chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
            if extra:
                chunksize += 1
        if len(iterable) == 0:
            chunksize = 0

        task_batches = Pool._get_tasks(func, iterable, chunksize)
        result = SafeMapResult(self._cache, chunksize, len(iterable), callback)
        self._taskqueue.put((((result._job, i, mapstar, (x, ), {})
                              for i, x in enumerate(task_batches)), None))
        return result
Beispiel #7
0
    def imap_unordered(self, func, iterable, chunksize):
        """Customized version of imap_unordered.

        Directly send chunks to func, instead of iterating in each process and
        sending one by one.

        Original:
        https://hg.python.org/cpython/file/tip/Lib/multiprocessing/pool.py#l271

        Other tried options:
        - map_async: makes a list(iterable), so it loads all the data for each
          process into RAM
        - apply_async: needs manual chunking
        """
        assert self._state == RUN
        task_batches = Pool._get_tasks(func, iterable, chunksize)
        result = IMapUnorderedIterator(self._cache)
        tasks = ((result._job, i, func, chunk, {})
                 for i, (_, chunk) in enumerate(task_batches))
        self._taskqueue.put((tasks, result._set_length))
        return result