def benchmark(workers, memory, loopcount, matn): iterable = [(loopcount, matn) for i in range(workers)] exc = FunctionExecutor(runtime_memory=memory) start_time = time.time() worker_futures = exc.map(compute_flops, iterable) results = exc.get_result() end_time = time.time() worker_stats = [f.stats for f in worker_futures] total_time = end_time - start_time print("Total time:", round(total_time, 3)) est_flops = workers * 2 * loopcount * matn**3 print('Estimated GFLOPS:', round(est_flops / 1e9 / total_time, 4)) res = { 'start_time': start_time, 'total_time': total_time, 'est_flops': est_flops, 'worker_stats': worker_stats, 'results': results } return res
def knative_executor(config=None, runtime=None, runtime_memory=None, workers=None, region=None, storage_backend=None, storage_backend_region=None, rabbitmq_monitor=None, remote_invoker=None, log_level=None): """ Function executor for Knative """ compute_backend = 'knative' return FunctionExecutor(config=config, runtime=runtime, runtime_memory=runtime_memory, workers=workers, compute_backend=compute_backend, compute_backend_region=region, storage_backend=storage_backend, storage_backend_region=storage_backend_region, rabbitmq_monitor=rabbitmq_monitor, remote_invoker=remote_invoker, log_level=log_level)
def function_executor(config=None, runtime=None, runtime_memory=None, workers=None, compute_backend=None, region=None, storage_backend=None, storage_backend_region=None, rabbitmq_monitor=None, remote_invoker=None, log_level=None): """ Generic function executor """ return FunctionExecutor(config=config, runtime=runtime, runtime_memory=runtime_memory, workers=workers, compute_backend=compute_backend, compute_backend_region=region, storage_backend=storage_backend, storage_backend_region=storage_backend_region, rabbitmq_monitor=rabbitmq_monitor, remote_invoker=remote_invoker, log_level=log_level)
class Popen(object): method = 'cloud' def __init__(self, process_obj): util._flush_std_streams() self.returncode = None self._executor = FunctionExecutor() self._launch(process_obj) def duplicate_for_child(self, fd): return fd def poll(self, flag=ALWAYS): if self.returncode is None: self._executor.wait([self.sentinel], return_when=flag) if self.sentinel.ready or self.sentinel.done: self.returncode = 0 if self.sentinel.error: self.returncode = 1 return self.returncode def wait(self, timeout=None): if self.returncode is None: wait = self._executor.wait if not wait([self.sentinel], timeout=timeout): return None # This shouldn't block if wait() returned successfully. return self.poll(ALWAYS if timeout == 0.0 else ALL_COMPLETED) return self.returncode def terminate(self): if self.returncode is None: try: self.sentinel.cancel() except NotImplementedError: pass def _launch(self, process_obj): fn_args = [*process_obj._args, *process_obj._kwargs] self.sentinel = self._executor.call_async(process_obj._target, fn_args)
def local_executor(config=None, workers=None, storage_backend=None, storage_backend_region=None, rabbitmq_monitor=None, log_level=None): """ Localhost function executor """ compute_backend = 'localhost' if storage_backend is None: storage_backend = 'localhost' return FunctionExecutor(config=config, workers=workers, compute_backend=compute_backend, storage_backend=storage_backend, storage_backend_region=storage_backend_region, rabbitmq_monitor=rabbitmq_monitor, log_level=log_level)
def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None): self._ctx = context or get_context() #self._setup_queues() self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") if processes is not None: if self._initargs: self._executor = FunctionExecutor(workers=processes, **self._initargs) else: self._executor = FunctionExecutor(workers=processes) self._processes = processes else: if self._initargs: self._executor = FunctionExecutor(**self._initargs) else: self._executor = FunctionExecutor() self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable') self._pool = []
def docker_executor(config=None, runtime=None, workers=None, storage_backend=None, storage_backend_region=None, rabbitmq_monitor=None, log_level=None): """ Localhost function executor """ compute_backend = 'docker' if storage_backend is None: storage_backend = 'localhost' return FunctionExecutor(config=config, runtime=runtime, workers=workers, compute_backend=compute_backend, storage_backend=storage_backend, storage_backend_region=storage_backend_region, rabbitmq_monitor=rabbitmq_monitor, remote_invoker=True, log_level=log_level)
class Pool(object): ''' Class which supports an async version of applying functions to arguments. ''' _wrap_exception = True def Process(self, *args, **kwds): return self._ctx.Process(*args, **kwds) def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None): self._ctx = context or get_context() #self._setup_queues() self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") if processes is not None: if self._initargs: self._executor = FunctionExecutor(workers=processes, **self._initargs) else: self._executor = FunctionExecutor(workers=processes) self._processes = processes else: if self._initargs: self._executor = FunctionExecutor(**self._initargs) else: self._executor = FunctionExecutor() self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable') self._pool = [] #self._repopulate_pool() # self._worker_handler = threading.Thread( # target=Pool._handle_workers, # args=(self, ) # ) # self._worker_handler.daemon = True # self._worker_handler._state = RUN # self._worker_handler.start() # # # self._task_handler = threading.Thread( # target=Pool._handle_tasks, # args=(self._taskqueue, self._quick_put, self._outqueue, # self._pool, self._cache) # ) # self._task_handler.daemon = True # self._task_handler._state = RUN # self._task_handler.start() # # self._result_handler = threading.Thread( # target=Pool._handle_results, # args=(self._outqueue, self._quick_get, self._cache) # ) # self._result_handler.daemon = True # self._result_handler._state = RUN # self._result_handler.start() # # self._terminate = util.Finalize( # self, self._terminate_pool, # args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, # self._worker_handler, self._task_handler, # self._result_handler, self._cache), # exitpriority=15 # ) def _join_exited_workers(self): """Cleanup after any worker processes which have exited due to reaching their specified lifetime. Returns True if any workers were cleaned up. """ cleaned = False for i in reversed(range(len(self._pool))): worker = self._pool[i] if worker.exitcode is not None: # worker exited util.debug('cleaning up worker %d' % i) worker.join() cleaned = True del self._pool[i] return cleaned def _repopulate_pool(self): """Bring the number of pool processes up to the specified number, for use after reaping workers which have exited. """ for i in range(self._processes - len(self._pool)): w = self.Process(target=worker, args=(self._inqueue, self._outqueue, self._initializer, self._initargs, self._maxtasksperchild, self._wrap_exception)) self._pool.append(w) w.name = w.name.replace('Process', 'PoolWorker') w.daemon = True w.start() util.debug('added worker') def _maintain_pool(self): """Clean up any exited workers and start replacements for them. """ if self._join_exited_workers(): self._repopulate_pool() def _setup_queues(self): self._inqueue = self._ctx.SimpleQueue() self._outqueue = self._ctx.SimpleQueue() self._quick_put = self._inqueue._writer.send self._quick_get = self._outqueue._reader.recv def apply(self, func, args=(), kwds={}): ''' Equivalent of `func(*args, **kwds)`. ''' assert self._state == RUN return self.apply_async(func, args, kwds).get() def map(self, func, iterable, chunksize=None): ''' Apply `func` to each element in `iterable`, collecting the results in a list that is returned. ''' return self._map_async(func, iterable, mapstar, chunksize).get() def starmap(self, func, iterable, chunksize=None): ''' Like `map()` method but the elements of the `iterable` are expected to be iterables as well and will be unpacked as arguments. Hence `func` and (a, b) becomes func(a, b). ''' return self._map_async(func, iterable, starmapstar, chunksize).get() def starmap_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): ''' Asynchronous version of `starmap()` method. ''' return self._map_async(func, iterable, starmapstar, chunksize, callback, error_callback) def _guarded_task_generation(self, result_job, func, iterable): '''Provides a generator of tasks for imap and imap_unordered with appropriate handling for iterables which throw exceptions during iteration.''' try: i = -1 for i, x in enumerate(iterable): yield (result_job, i, func, (x, ), {}) except Exception as e: yield (result_job, i + 1, _helper_reraises_exception, (e, ), {}) def imap(self, func, iterable, chunksize=1): ''' Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. ''' if self._state != RUN: raise ValueError("Pool not running") if chunksize == 1: result = IMapIterator(self._cache) self._taskqueue.put( (self._guarded_task_generation(result._job, func, iterable), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapIterator(self._cache) self._taskqueue.put( (self._guarded_task_generation(result._job, mapstar, task_batches), result._set_length)) return (item for chunk in result for item in chunk) def imap_unordered(self, func, iterable, chunksize=1): ''' Like `imap()` method but ordering of results is arbitrary. ''' if self._state != RUN: raise ValueError("Pool not running") if chunksize == 1: result = IMapUnorderedIterator(self._cache) self._taskqueue.put( (self._guarded_task_generation(result._job, func, iterable), result._set_length)) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapUnorderedIterator(self._cache) self._taskqueue.put( (self._guarded_task_generation(result._job, mapstar, task_batches), result._set_length)) return (item for chunk in result for item in chunk) def apply_async(self, func, args=(), kwds={}, callback=None, error_callback=None): ''' Asynchronous version of `apply()` method. ''' if self._state != RUN: raise ValueError("Pool not running") futures = self._executor.call_async(func, [*args, *kwds]) result = ApplyResult(self._executor, [futures], callback, error_callback) return result def map_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): ''' Asynchronous version of `map()` method. ''' return self._map_async(func, iterable, mapstar, chunksize, callback, error_callback) def _map_async(self, func, iterable, mapper, chunksize=None, callback=None, error_callback=None): ''' Helper function to implement map, starmap and their async counterparts. ''' if self._state != RUN: raise ValueError("Pool not running") if not hasattr(iterable, '__len__'): iterable = list(iterable) futures = self._executor.map(func, iterable) result = MapResult(self._executor, futures, callback, error_callback) return result @staticmethod def _handle_workers(pool): thread = threading.current_thread() # Keep maintaining workers until the cache gets drained, unless the pool # is terminated. while thread._state == RUN or (pool._cache and thread._state != TERMINATE): pool._maintain_pool() time.sleep(0.1) # send sentinel to stop workers pool._taskqueue.put(None) util.debug('worker handler exiting') @staticmethod def _handle_tasks(taskqueue, put, outqueue, pool, cache): thread = threading.current_thread() for taskseq, set_length in iter(taskqueue.get, None): task = None try: # iterating taskseq cannot fail for task in taskseq: if thread._state: util.debug('task handler found thread._state != RUN') break try: put(task) except Exception as e: job, idx = task[:2] try: cache[job]._set(idx, (False, e)) except KeyError: pass else: if set_length: util.debug('doing set_length()') idx = task[1] if task else -1 set_length(idx + 1) continue break finally: task = taskseq = job = None else: util.debug('task handler got sentinel') try: # tell result handler to finish when cache is empty util.debug('task handler sending sentinel to result handler') outqueue.put(None) # tell workers there is no more work util.debug('task handler sending sentinel to workers') for p in pool: put(None) except OSError: util.debug('task handler got OSError when sending sentinels') util.debug('task handler exiting') @staticmethod def _handle_results(outqueue, get, cache): thread = threading.current_thread() while 1: try: task = get() except (OSError, EOFError): util.debug('result handler got EOFError/OSError -- exiting') return if thread._state: assert thread._state == TERMINATE util.debug('result handler found thread._state=TERMINATE') break if task is None: util.debug('result handler got sentinel') break job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass task = job = obj = None while cache and thread._state != TERMINATE: try: task = get() except (OSError, EOFError): util.debug('result handler got EOFError/OSError -- exiting') return if task is None: util.debug('result handler ignoring extra sentinel') continue job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass task = job = obj = None if hasattr(outqueue, '_reader'): util.debug('ensuring that outqueue is not full') # If we don't make room available in outqueue then # attempts to add the sentinel (None) to outqueue may # block. There is guaranteed to be no more than 2 sentinels. try: for i in range(10): if not outqueue._reader.poll(): break get() except (OSError, EOFError): pass util.debug('result handler exiting: len(cache)=%s, thread._state=%s', len(cache), thread._state) @staticmethod def _get_tasks(func, it, size): it = iter(it) while 1: x = tuple(itertools.islice(it, size)) if not x: return yield (func, x) def __reduce__(self): raise NotImplementedError( 'pool objects cannot be passed between processes or pickled') def close(self): util.debug('closing pool') if self._state == RUN: self._state = CLOSE #self._worker_handler._state = CLOSE def terminate(self): util.debug('terminating pool') self._state = TERMINATE #self._worker_handler._state = TERMINATE #self._terminate() self._executor.clean() def join(self): util.debug('joining pool') assert self._state in (CLOSE, TERMINATE) #self._worker_handler.join() #self._task_handler.join() #self._result_handler.join() #for p in self._pool: # p.join() @staticmethod def _help_stuff_finish(inqueue, task_handler, size): # task_handler may be blocked trying to put items on inqueue util.debug('removing tasks from inqueue until task handler finished') while task_handler.is_alive() and inqueue._reader.poll(): inqueue._reader.recv() time.sleep(0) @classmethod def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, worker_handler, task_handler, result_handler, cache): # this is guaranteed to only be called once util.debug('finalizing pool') worker_handler._state = TERMINATE task_handler._state = TERMINATE util.debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) assert result_handler.is_alive() or len(cache) == 0 result_handler._state = TERMINATE outqueue.put(None) # sentinel # We must wait for the worker handler to exit before terminating # workers because we don't want workers to be restarted behind our back. util.debug('joining worker handler') if threading.current_thread() is not worker_handler: worker_handler.join() # Terminate workers which haven't already finished. if pool and hasattr(pool[0], 'terminate'): util.debug('terminating workers') for p in pool: if p.exitcode is None: p.terminate() util.debug('joining task handler') if threading.current_thread() is not task_handler: task_handler.join() util.debug('joining result handler') if threading.current_thread() is not result_handler: result_handler.join() if pool and hasattr(pool[0], 'terminate'): util.debug('joining pool workers') for p in pool: if p.is_alive(): # worker has not yet exited util.debug('cleaning up worker %d' % p.pid) p.join() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.terminate()
def __init__(self, process_obj): util._flush_std_streams() self.returncode = None self._executor = FunctionExecutor() self._launch(process_obj)