def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count( ) execute_step = functools.partial(_execute_step, debug) self._processed_queue = Queue() self.node_builder = scheduler.node_builder process_initializer = functools.partial(_process_initializer, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug self._pool.start()
def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count() execute_step = functools.partial(_execute_step, debug) self._processed_queue = Queue() self.node_builder = scheduler.node_builder process_initializer = functools.partial(_process_initializer, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug self._pool.start()
class LocalMultiprocessEngine(ConcurrentEngine): """An engine that runs tasks locally and in parallel when possible using a process pool. This implementation stores all process inputs in Storage and executes cache lookups before submitting a task to another process. This use of Storage means that only a Key for the Runnable is sent (directly) across process boundaries, and avoids sending the same data across process boundaries repeatedly. """ def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count( ) execute_step = functools.partial(_execute_step, debug) self._processed_queue = Queue() self.node_builder = scheduler.node_builder process_initializer = functools.partial(_process_initializer, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug self._pool.start() def _submit(self, step_id, runnable_key, is_cacheable): entry = (step_id, runnable_key, is_cacheable) if self._debug: _try_pickle(entry) self._pool.submit(entry) def close(self): self._pool.close() def _submit_until(self, pending_submission, in_flight, n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min( len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 completed = [] for _ in range(to_submit): step, runnable = pending_submission.popitem(last=False) if step in in_flight: raise InFlightException( '{} is already in_flight!'.format(step)) # We eagerly compute a key for the Runnable, because it allows us to avoid sending the same # data across process boundaries repeatedly. runnable_key = self._storage.put_state(runnable) is_cacheable = self._use_cache and step.node.is_cacheable result = self._cache.get_for_key( runnable_key) if is_cacheable else None if result is not None: # Skip in_flight on cache hit. completed.append((step, result)) else: step_id = id(step) in_flight[step_id] = step self._submit(step_id, runnable_key, is_cacheable) submitted += 1 return submitted, completed def _await_one(self, in_flight): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise InFlightException('Awaited an empty pool!') step_id, result_key = self._pool.await_one_result() if isinstance(result_key, Exception): raise result_key if step_id not in in_flight: raise InFlightException( 'Received unexpected work from the Executor: {} vs {}'.format( step_id, in_flight.keys())) return in_flight.pop(step_id), self._storage.get_state(result_key)
class LocalMultiprocessEngine(Engine): """An engine that runs tasks locally and in parallel when possible using a process pool.""" def __init__(self, scheduler, storage, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count( ) execute_step = functools.partial(_execute_step, self._maybe_cache_put, debug) node_builder = scheduler.node_builder() process_initializer = functools.partial(_process_initializer, node_builder, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug def _submit(self, step): _try_pickle(step) self._pool.submit(step) def start(self): self._pool.start() def reduce(self, execution_request): # Step instances which have not been submitted yet. # TODO: Scheduler now only sends work once, so a deque should be fine here. pending_submission = OrderedSet() # Dict from step id to a Promise for Steps that have been submitted. in_flight = dict() def submit_until(n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min( len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 for _ in range(to_submit): step, promise = pending_submission.pop(last=False) if step.step_id in in_flight: raise Exception('{} is already in_flight!'.format(step)) step = self._storage.key_for_request(step) result = self._maybe_cache_get(step) if result is not None: # Skip in_flight on cache hit. promise.success(result) else: in_flight[step.step_id] = promise self._submit(step) submitted += 1 return submitted def await_one(): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise Exception('Awaited an empty pool!') step_id, result = self._pool.await_one_result() if isinstance(result, Exception): raise result result = self._storage.resolve_result(result) if step_id not in in_flight: raise Exception( 'Received unexpected work from the Executor: {} vs {}'. format(step_id, in_flight.keys())) in_flight.pop(step_id).success(result) # The main reduction loop: # 1. Whenever we don't have enough work to saturate the pool, request more. # 2. Whenever the pool is not saturated, submit currently pending work. for step_batch in self._scheduler.schedule(execution_request): if not step_batch: # A batch should only be empty if all dependency work is currently blocked/running. if not in_flight and not pending_submission: raise Exception( 'Scheduler provided an empty batch while no work is in progress!' ) else: # Submit and wait for work for as long as we're able to keep the pool saturated. pending_submission.update(step_batch) while submit_until(self._pool_size) > 0: await_one() # Await at least one entry per scheduling loop. submit_until(0) if in_flight: await_one() # Consume all steps. while pending_submission or in_flight: submit_until(self._pool_size) await_one() def close(self): super(LocalMultiprocessEngine, self).close() self._pool.close()
class LocalMultiprocessEngine(Engine): """An engine that runs tasks locally and in parallel when possible using a process pool.""" def __init__(self, scheduler, storage, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count() execute_step = functools.partial(_execute_step, self._maybe_cache_put, debug) node_builder = scheduler.node_builder() process_initializer = functools.partial(_process_initializer, node_builder, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug def _submit(self, step): _try_pickle(step) self._pool.submit(step) def start(self): self._pool.start() def reduce(self, execution_request): # Step instances which have not been submitted yet. # TODO: Scheduler now only sends work once, so a deque should be fine here. pending_submission = OrderedSet() # Dict from step id to a Promise for Steps that have been submitted. in_flight = dict() def submit_until(n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 for _ in range(to_submit): step, promise = pending_submission.pop(last=False) if step.step_id in in_flight: raise Exception('{} is already in_flight!'.format(step)) step = self._storage.key_for_request(step) result = self._maybe_cache_get(step) if result is not None: # Skip in_flight on cache hit. promise.success(result) else: in_flight[step.step_id] = promise self._submit(step) submitted += 1 return submitted def await_one(): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise Exception('Awaited an empty pool!') step_id, result = self._pool.await_one_result() if isinstance(result, Exception): raise result result = self._storage.resolve_result(result) if step_id not in in_flight: raise Exception('Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys())) in_flight.pop(step_id).success(result) # The main reduction loop: # 1. Whenever we don't have enough work to saturate the pool, request more. # 2. Whenever the pool is not saturated, submit currently pending work. for step_batch in self._scheduler.schedule(execution_request): if not step_batch: # A batch should only be empty if all dependency work is currently blocked/running. if not in_flight and not pending_submission: raise Exception('Scheduler provided an empty batch while no work is in progress!') else: # Submit and wait for work for as long as we're able to keep the pool saturated. pending_submission.update(step_batch) while submit_until(self._pool_size) > 0: await_one() # Await at least one entry per scheduling loop. submit_until(0) if in_flight: await_one() # Consume all steps. while pending_submission or in_flight: submit_until(self._pool_size) await_one() def close(self): super(LocalMultiprocessEngine, self).close() self._pool.close()
class LocalMultiprocessEngine(ConcurrentEngine): """An engine that runs tasks locally and in parallel when possible using a process pool. This implementation stores all process inputs in Storage and executes cache lookups before submitting a task to another process. This use of Storage means that only a Key for the Runnable is sent (directly) across process boundaries, and avoids sending the same data across process boundaries repeatedly. """ def __init__(self, scheduler, storage=None, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count() execute_step = functools.partial(_execute_step, debug) self._processed_queue = Queue() self.node_builder = scheduler.node_builder process_initializer = functools.partial(_process_initializer, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug self._pool.start() def _submit(self, step_id, runnable_key, is_cacheable): entry = (step_id, runnable_key, is_cacheable) if self._debug: _try_pickle(entry) self._pool.submit(entry) def close(self): self._pool.close() def _submit_until(self, pending_submission, in_flight, n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 completed = [] for _ in range(to_submit): step, runnable = pending_submission.popitem(last=False) if step in in_flight: raise InFlightException('{} is already in_flight!'.format(step)) # We eagerly compute a key for the Runnable, because it allows us to avoid sending the same # data across process boundaries repeatedly. runnable_key = self._storage.put_state(runnable) is_cacheable = self._use_cache and step.node.is_cacheable result = self._cache.get_for_key(runnable_key) if is_cacheable else None if result is not None: # Skip in_flight on cache hit. completed.append((step, result)) else: step_id = id(step) in_flight[step_id] = step self._submit(step_id, runnable_key, is_cacheable) submitted += 1 return submitted, completed def _await_one(self, in_flight): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise InFlightException('Awaited an empty pool!') step_id, result_key = self._pool.await_one_result() if isinstance(result_key, Exception): raise result_key if step_id not in in_flight: raise InFlightException( 'Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys())) return in_flight.pop(step_id), self._storage.get_state(result_key)
class LocalMultiprocessEngine(ConcurrentEngine): """An engine that runs tasks locally and in parallel when possible using a process pool.""" def __init__(self, scheduler, storage, cache=None, pool_size=None, debug=True): """ :param scheduler: The local scheduler for creating execution graphs. :type scheduler: :class:`pants.engine.scheduler.LocalScheduler` :param storage: The storage instance for serializables keyed by their hashes. :type storage: :class:`pants.engine.storage.Storage` :param cache: The cache instance for storing execution results, by default it uses the same Storage instance if not specified. :type cache: :class:`pants.engine.storage.Cache` :param int pool_size: The number of worker processes to use; by default 2 processes per core will be used. :param bool debug: `True` to turn on pickling error debug mode (slower); True by default. """ # This is the only place where non in-memory storage is needed, create one if not specified. storage = storage or Storage.create(in_memory=False) super(LocalMultiprocessEngine, self).__init__(scheduler, storage, cache) self._pool_size = pool_size if pool_size and pool_size > 0 else 2 * multiprocessing.cpu_count() execute_step = functools.partial(_execute_step, self._maybe_cache_put, debug) self._processed_queue = Queue() self.node_builder = scheduler.node_builder() process_initializer = functools.partial(self._initializer, self.node_builder, self._storage) self._pool = StatefulPool(self._pool_size, process_initializer, execute_step) self._debug = debug @property def _initializer(self): return _process_initializer def _submit(self, step): _try_pickle(step) self._pool.submit(step) def start(self): self._pool.start() def close(self): self._pool.close() def _is_async_node(self, node): return True def _submit_until(self, pending_submission, in_flight, n): """Submit pending while there's capacity, and more than `n` items pending_submission.""" to_submit = min(len(pending_submission) - n, self._pool_size - len(in_flight)) submitted = 0 for _ in range(to_submit): step, promise = pending_submission.pop(last=False) if self._is_async_node(step.node): if step.step_id in in_flight: raise InFlightException('{} is already in_flight!'.format(step)) step = self._storage.key_for_request(step) result = self._maybe_cache_get(step) if result is not None: # Skip in_flight on cache hit. promise.success(result) else: in_flight[step.step_id] = promise self._submit(step) submitted += 1 else: keyed_request = self._storage.key_for_request(step) result = self._maybe_cache_get(keyed_request) if result is None: result = step(self.node_builder) self._maybe_cache_put(keyed_request, result) promise.success(result) return submitted def _await_one(self, in_flight): """Await one completed step, and remove it from in_flight.""" if not in_flight: raise InFlightException('Awaited an empty pool!') step_id, result = self._pool.await_one_result() if isinstance(result, Exception): raise result result = self._storage.resolve_result(result) if step_id not in in_flight: raise InFlightException( 'Received unexpected work from the Executor: {} vs {}'.format(step_id, in_flight.keys())) in_flight.pop(step_id).success(result)