Python StatefulPoolの例、pants.engine.processing.StatefulPool Pythonの例

コード例 #1

0

ファイルを表示

ファイル: engine.py プロジェクト: jayvaidya/pants

    def __init__(self,
                 scheduler,
                 storage=None,
                 cache=None,
                 pool_size=None,
                 debug=True):
        """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
        # This is the only place where non in-memory storage is needed, create one if not specified.
        storage = storage or Storage.create(in_memory=False)
        super(LocalMultiprocessEngine, self).__init__(scheduler, storage,
                                                      cache)
        self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count(
        )

        execute_step = functools.partial(_execute_step, debug)

        self._processed_queue = Queue()
        self.node_builder = scheduler.node_builder
        process_initializer = functools.partial(_process_initializer,
                                                self._storage)
        self._pool = StatefulPool(self._pool_size, process_initializer,
                                  execute_step)
        self._debug = debug
        self._pool.start()

コード例 #2

0

ファイルを表示

ファイル: engine.py プロジェクト: kwlzn/pants

  def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True):
    """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
    # This is the only place where non in-memory storage is needed, create one if not specified.
    storage = storage or Storage.create(in_memory=False)
    super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache)
    self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count()

    execute_step = functools.partial(_execute_step, debug)

    self._processed_queue = Queue()
    self.node_builder = scheduler.node_builder
    process_initializer = functools.partial(_process_initializer, self._storage)
    self._pool = StatefulPool(self._pool_size, process_initializer, execute_step)
    self._debug = debug
    self._pool.start()

コード例 #3

0

ファイルを表示

ファイル: engine.py プロジェクト: jayvaidya/pants

class LocalMultiprocessEngine(ConcurrentEngine):
    """An engine that runs tasks locally and in parallel when possible using a process pool.

  This implementation stores all process inputs in Storage and executes cache lookups before
  submitting a task to another process. This use of Storage means that only a Key for the
  Runnable is sent (directly) across process boundaries, and avoids sending the same data across
  process boundaries repeatedly.
  """
    def __init__(self,
                 scheduler,
                 storage=None,
                 cache=None,
                 pool_size=None,
                 debug=True):
        """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
        # This is the only place where non in-memory storage is needed, create one if not specified.
        storage = storage or Storage.create(in_memory=False)
        super(LocalMultiprocessEngine, self).__init__(scheduler, storage,
                                                      cache)
        self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count(
        )

        execute_step = functools.partial(_execute_step, debug)

        self._processed_queue = Queue()
        self.node_builder = scheduler.node_builder
        process_initializer = functools.partial(_process_initializer,
                                                self._storage)
        self._pool = StatefulPool(self._pool_size, process_initializer,
                                  execute_step)
        self._debug = debug
        self._pool.start()

    def _submit(self, step_id, runnable_key, is_cacheable):
        entry = (step_id, runnable_key, is_cacheable)
        if self._debug:
            _try_pickle(entry)
        self._pool.submit(entry)

    def close(self):
        self._pool.close()

    def _submit_until(self, pending_submission, in_flight, n):
        """Submit pending while there's capacity, and more than `n` items pending_submission."""
        to_submit = min(
            len(pending_submission) - n, self._pool_size - len(in_flight))
        submitted = 0
        completed = []
        for _ in range(to_submit):
            step, runnable = pending_submission.popitem(last=False)
            if step in in_flight:
                raise InFlightException(
                    '{} is already in_flight!'.format(step))

            # We eagerly compute a key for the Runnable, because it allows us to avoid sending the same
            # data across process boundaries repeatedly.
            runnable_key = self._storage.put_state(runnable)
            is_cacheable = self._use_cache and step.node.is_cacheable
            result = self._cache.get_for_key(
                runnable_key) if is_cacheable else None
            if result is not None:
                # Skip in_flight on cache hit.
                completed.append((step, result))
            else:
                step_id = id(step)
                in_flight[step_id] = step
                self._submit(step_id, runnable_key, is_cacheable)
                submitted += 1

        return submitted, completed

    def _await_one(self, in_flight):
        """Await one completed step, and remove it from in_flight."""
        if not in_flight:
            raise InFlightException('Awaited an empty pool!')
        step_id, result_key = self._pool.await_one_result()
        if isinstance(result_key, Exception):
            raise result_key
        if step_id not in in_flight:
            raise InFlightException(
                'Received unexpected work from the Executor: {} vs {}'.format(
                    step_id, in_flight.keys()))
        return in_flight.pop(step_id), self._storage.get_state(result_key)

コード例 #4

0

ファイルを表示

ファイル: engine.py プロジェクト: neven7/pants

class LocalMultiprocessEngine(Engine):
    """An engine that runs tasks locally and in parallel when possible using a process pool."""
    def __init__(self,
                 scheduler,
                 storage,
                 cache=None,
                 pool_size=None,
                 debug=True):
        """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
        # This is the only place where non in-memory storage is needed, create one if not specified.
        storage = storage or Storage.create(in_memory=False)
        super(LocalMultiprocessEngine, self).__init__(scheduler, storage,
                                                      cache)
        self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count(
        )

        execute_step = functools.partial(_execute_step, self._maybe_cache_put,
                                         debug)
        node_builder = scheduler.node_builder()
        process_initializer = functools.partial(_process_initializer,
                                                node_builder, self._storage)
        self._pool = StatefulPool(self._pool_size, process_initializer,
                                  execute_step)
        self._debug = debug

    def _submit(self, step):
        _try_pickle(step)
        self._pool.submit(step)

    def start(self):
        self._pool.start()

    def reduce(self, execution_request):
        # Step instances which have not been submitted yet.
        # TODO: Scheduler now only sends work once, so a deque should be fine here.
        pending_submission = OrderedSet()
        # Dict from step id to a Promise for Steps that have been submitted.
        in_flight = dict()

        def submit_until(n):
            """Submit pending while there's capacity, and more than `n` items pending_submission."""
            to_submit = min(
                len(pending_submission) - n, self._pool_size - len(in_flight))
            submitted = 0
            for _ in range(to_submit):
                step, promise = pending_submission.pop(last=False)

                if step.step_id in in_flight:
                    raise Exception('{} is already in_flight!'.format(step))

                step = self._storage.key_for_request(step)
                result = self._maybe_cache_get(step)
                if result is not None:
                    # Skip in_flight on cache hit.
                    promise.success(result)
                else:
                    in_flight[step.step_id] = promise
                    self._submit(step)
                    submitted += 1
            return submitted

        def await_one():
            """Await one completed step, and remove it from in_flight."""
            if not in_flight:
                raise Exception('Awaited an empty pool!')
            step_id, result = self._pool.await_one_result()
            if isinstance(result, Exception):
                raise result
            result = self._storage.resolve_result(result)
            if step_id not in in_flight:
                raise Exception(
                    'Received unexpected work from the Executor: {} vs {}'.
                    format(step_id, in_flight.keys()))
            in_flight.pop(step_id).success(result)

        # The main reduction loop:
        # 1. Whenever we don't have enough work to saturate the pool, request more.
        # 2. Whenever the pool is not saturated, submit currently pending work.
        for step_batch in self._scheduler.schedule(execution_request):
            if not step_batch:
                # A batch should only be empty if all dependency work is currently blocked/running.
                if not in_flight and not pending_submission:
                    raise Exception(
                        'Scheduler provided an empty batch while no work is in progress!'
                    )
            else:
                # Submit and wait for work for as long as we're able to keep the pool saturated.
                pending_submission.update(step_batch)
                while submit_until(self._pool_size) > 0:
                    await_one()
            # Await at least one entry per scheduling loop.
            submit_until(0)
            if in_flight:
                await_one()

        # Consume all steps.
        while pending_submission or in_flight:
            submit_until(self._pool_size)
            await_one()

    def close(self):
        super(LocalMultiprocessEngine, self).close()
        self._pool.close()

コード例 #5

0

ファイルを表示

ファイル: engine.py プロジェクト: adamchainz/pants

class LocalMultiprocessEngine(Engine):
  """An engine that runs tasks locally and in parallel when possible using a process pool."""

  def __init__(self, scheduler, storage, cache=None, pool_size=None, debug=True):
    """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
    # This is the only place where non in-memory storage is needed, create one if not specified.
    storage = storage or Storage.create(in_memory=False)
    super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache)
    self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count()

    execute_step = functools.partial(_execute_step, self._maybe_cache_put, debug)
    node_builder = scheduler.node_builder()
    process_initializer = functools.partial(_process_initializer, node_builder, self._storage)
    self._pool = StatefulPool(self._pool_size, process_initializer, execute_step)
    self._debug = debug

  def _submit(self, step):
    _try_pickle(step)
    self._pool.submit(step)

  def start(self):
    self._pool.start()

  def reduce(self, execution_request):
    # Step instances which have not been submitted yet.
    # TODO: Scheduler now only sends work once, so a deque should be fine here.
    pending_submission = OrderedSet()
    # Dict from step id to a Promise for Steps that have been submitted.
    in_flight = dict()

    def submit_until(n):
      """Submit pending while there's capacity, and more than `n` items pending_submission."""
      to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight))
      submitted = 0
      for _ in range(to_submit):
        step, promise = pending_submission.pop(last=False)

        if step.step_id in in_flight:
          raise Exception('{} is already in_flight!'.format(step))

        step = self._storage.key_for_request(step)
        result = self._maybe_cache_get(step)
        if result is not None:
          # Skip in_flight on cache hit.
          promise.success(result)
        else:
          in_flight[step.step_id] = promise
          self._submit(step)
          submitted += 1
      return submitted

    def await_one():
      """Await one completed step, and remove it from in_flight."""
      if not in_flight:
        raise Exception('Awaited an empty pool!')
      step_id, result = self._pool.await_one_result()
      if isinstance(result, Exception):
        raise result
      result = self._storage.resolve_result(result)
      if step_id not in in_flight:
        raise Exception('Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys()))
      in_flight.pop(step_id).success(result)

    # The main reduction loop:
    # 1. Whenever we don't have enough work to saturate the pool, request more.
    # 2. Whenever the pool is not saturated, submit currently pending work.
    for step_batch in self._scheduler.schedule(execution_request):
      if not step_batch:
        # A batch should only be empty if all dependency work is currently blocked/running.
        if not in_flight and not pending_submission:
          raise Exception('Scheduler provided an empty batch while no work is in progress!')
      else:
        # Submit and wait for work for as long as we're able to keep the pool saturated.
        pending_submission.update(step_batch)
        while submit_until(self._pool_size) > 0:
          await_one()
      # Await at least one entry per scheduling loop.
      submit_until(0)
      if in_flight:
        await_one()

    # Consume all steps.
    while pending_submission or in_flight:
      submit_until(self._pool_size)
      await_one()

  def close(self):
    super(LocalMultiprocessEngine, self).close()
    self._pool.close()

コード例 #6

0

ファイルを表示

ファイル: engine.py プロジェクト: kwlzn/pants

class LocalMultiprocessEngine(ConcurrentEngine):
  """An engine that runs tasks locally and in parallel when possible using a process pool.

  This implementation stores all process inputs in Storage and executes cache lookups before
  submitting a task to another process. This use of Storage means that only a Key for the
  Runnable is sent (directly) across process boundaries, and avoids sending the same data across
  process boundaries repeatedly.
  """

  def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True):
    """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
    # This is the only place where non in-memory storage is needed, create one if not specified.
    storage = storage or Storage.create(in_memory=False)
    super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache)
    self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count()

    execute_step = functools.partial(_execute_step, debug)

    self._processed_queue = Queue()
    self.node_builder = scheduler.node_builder
    process_initializer = functools.partial(_process_initializer, self._storage)
    self._pool = StatefulPool(self._pool_size, process_initializer, execute_step)
    self._debug = debug
    self._pool.start()

  def _submit(self, step_id, runnable_key, is_cacheable):
    entry = (step_id, runnable_key, is_cacheable)
    if self._debug:
      _try_pickle(entry)
    self._pool.submit(entry)

  def close(self):
    self._pool.close()

  def _submit_until(self, pending_submission, in_flight, n):
    """Submit pending while there's capacity, and more than `n` items pending_submission."""
    to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight))
    submitted = 0
    completed = []
    for _ in range(to_submit):
      step, runnable = pending_submission.popitem(last=False)
      if step in in_flight:
        raise InFlightException('{} is already in_flight!'.format(step))

      # We eagerly compute a key for the Runnable, because it allows us to avoid sending the same
      # data across process boundaries repeatedly.
      runnable_key = self._storage.put_state(runnable)
      is_cacheable = self._use_cache and step.node.is_cacheable
      result = self._cache.get_for_key(runnable_key) if is_cacheable else None
      if result is not None:
        # Skip in_flight on cache hit.
        completed.append((step, result))
      else:
        step_id = id(step)
        in_flight[step_id] = step
        self._submit(step_id, runnable_key, is_cacheable)
        submitted += 1

    return submitted, completed

  def _await_one(self, in_flight):
    """Await one completed step, and remove it from in_flight."""
    if not in_flight:
      raise InFlightException('Awaited an empty pool!')
    step_id, result_key = self._pool.await_one_result()
    if isinstance(result_key, Exception):
      raise result_key
    if step_id not in in_flight:
      raise InFlightException(
        'Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys()))
    return in_flight.pop(step_id), self._storage.get_state(result_key)

コード例 #7

0

ファイルを表示

ファイル: engine.py プロジェクト: RobinTec/pants

class LocalMultiprocessEngine(ConcurrentEngine):
  """An engine that runs tasks locally and in parallel when possible using a process pool."""

  def __init__(self, scheduler, storage, cache=None, pool_size=None, debug=True):
    """
    :param scheduler: The local scheduler for creating execution graphs.
    :type scheduler: :class:`pants.engine.scheduler.LocalScheduler`
    :param storage: The storage instance for serializables keyed by their hashes.
    :type storage: :class:`pants.engine.storage.Storage`
    :param cache: The cache instance for storing execution results, by default it uses the same
      Storage instance if not specified.
    :type cache: :class:`pants.engine.storage.Cache`
    :param int pool_size: The number of worker processes to use; by default 2 processes per core will
                          be used.
    :param bool debug: `True` to turn on pickling error debug mode (slower); True by default.
    """
    # This is the only place where non in-memory storage is needed, create one if not specified.
    storage = storage or Storage.create(in_memory=False)
    super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache)
    self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count()

    execute_step = functools.partial(_execute_step, self._maybe_cache_put, debug)

    self._processed_queue = Queue()
    self.node_builder = scheduler.node_builder()
    process_initializer = functools.partial(self._initializer, self.node_builder, self._storage)
    self._pool = StatefulPool(self._pool_size, process_initializer, execute_step)
    self._debug = debug

  @property
  def _initializer(self):
    return _process_initializer

  def _submit(self, step):
    _try_pickle(step)
    self._pool.submit(step)

  def start(self):
    self._pool.start()

  def close(self):
    self._pool.close()

  def _is_async_node(self, node):
    return True

  def _submit_until(self, pending_submission, in_flight, n):
    """Submit pending while there's capacity, and more than `n` items pending_submission."""
    to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight))
    submitted = 0
    for _ in range(to_submit):
      step, promise = pending_submission.pop(last=False)

      if self._is_async_node(step.node):
        if step.step_id in in_flight:
          raise InFlightException('{} is already in_flight!'.format(step))

        step = self._storage.key_for_request(step)
        result = self._maybe_cache_get(step)
        if result is not None:
          # Skip in_flight on cache hit.
          promise.success(result)
        else:
          in_flight[step.step_id] = promise
          self._submit(step)
          submitted += 1
      else:
        keyed_request = self._storage.key_for_request(step)
        result = self._maybe_cache_get(keyed_request)
        if result is None:
          result = step(self.node_builder)
          self._maybe_cache_put(keyed_request, result)
        promise.success(result)

    return submitted

  def _await_one(self, in_flight):
    """Await one completed step, and remove it from in_flight."""
    if not in_flight:
      raise InFlightException('Awaited an empty pool!')
    step_id, result = self._pool.await_one_result()
    if isinstance(result, Exception):
      raise result
    result = self._storage.resolve_result(result)
    if step_id not in in_flight:
      raise InFlightException(
        'Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys()))
    in_flight.pop(step_id).success(result)