def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=EXECUTION_TIMEOUT, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ if not fs: fs = [] for job in self.jobs: if not download_results and self.jobs[job][ 'state'] == JobState.Running: fs.extend(self.jobs[job]['futures']) self.jobs[job]['state'] = JobState.Ready elif download_results and self.jobs[job][ 'state'] != JobState.Done: fs.extend(self.jobs[job]['futures']) self.jobs[job]['state'] = JobState.Done if type(fs) != list: futures = [fs] else: futures = fs if not futures: raise Exception( 'You must run the call_async(), map() or map_reduce(), or provide' ' a list of futures before calling the monitor()/get_result() method' ) if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format( self.executor_id) logger.info(msg) if not self.log_level and self._state == FunctionExecutor.State.Running: print(msg) if is_unix_system(): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) pbar = None if not self.is_remote_cluster and self._state == FunctionExecutor.State.Running \ and not self.log_level: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(futures)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(futures), disable=False) try: if self.rabbitmq_monitor: logger.info('Using RabbitMQ to monitor function activations') wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url, download_results=download_results, throw_except=throw_except, pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE) else: wait_storage(futures, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except FunctionException as e: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() logger.info(e.msg) if not self.log_level: if not is_notebook(): print() print(e.msg) if e.exc_msg: logger.info('Exception: ' + e.exc_msg) if not self.log_level: print('--> Exception: ' + e.exc_msg) else: print() traceback.print_exception(*e.exception) sys.exit() except TimeoutError: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ( 'ExecutorID {} - Raised timeout of {} seconds waiting for results - Total Activations not done: {}' .format(self.executor_id, timeout, len(not_dones_call_ids))) self._state = FunctionExecutor.State.Error except KeyboardInterrupt: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}' .format(self.executor_id, len(not_dones_call_ids))) self._state = FunctionExecutor.State.Error except Exception as e: if not self.is_remote_cluster: self.clean() raise e finally: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() if not is_notebook(): print() if self._state == FunctionExecutor.State.Error: logger.debug(msg) if not self.log_level: print(msg) if download_results and self.data_cleaner and not self.is_remote_cluster: self.clean() if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] self._state = FunctionExecutor.State.Done else: fs_done = [f for f in futures if f.ready or f.done] fs_notdone = [f for f in futures if not f.ready and not f.done] self._state = FunctionExecutor.State.Ready return fs_done, fs_notdone
def monitor(self, futures=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=wrenconfig.RUNTIME_TIMEOUT, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances `fs` to complete. Returns a 2-tuple of lists. The first list contains the futures that completed (finished or cancelled) before the wait completed. The second contains uncompleted futures. :param futures: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only download statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of lists Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data_list) >>> dones, not_dones = pw.monitor() >>> # not_dones should be an empty list. >>> results = [f.result() for f in dones] """ if futures: # Ensure futures is a list if type(futures) != list: ftrs = [futures] else: ftrs = futures else: # In this case self.futures is always a list ftrs = self.futures if not ftrs: raise Exception('You must run pw.call_async(), pw.map()' ' or pw.map_reduce() before call pw.get_result()') rabbit_amqp_url = None if self._state == ExecutorState.running: if self.rabbitmq_monitor: rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') if rabbit_amqp_url and not download_results: logger.info( 'Going to use RabbitMQ to monitor function activations') if download_results: msg = 'Executor ID {} Getting results...'.format( self.executor_id) else: msg = 'Executor ID {} Waiting for functions to complete...'.format( self.executor_id) logger.info(msg) if not self.log_level and self._state == ExecutorState.running: print(msg) if is_unix_system(): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) pbar = None if not self.is_cf_cluster and self._state == ExecutorState.running \ and not self.log_level and not is_notebook(): import tqdm print() pbar = tqdm.tqdm( bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(ftrs), disable=False) try: wait(ftrs, self.executor_id, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, rabbit_amqp_url=rabbit_amqp_url, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except TimeoutError: if download_results: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.done ] else: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.ready ] msg = ( 'Executor ID {} Raised timeout of {} seconds waiting for results ' '\nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids)) self._state = ExecutorState.error except KeyboardInterrupt: if download_results: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.done ] else: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.ready ] msg = 'Executor ID {} Cancelled \nActivations not done: {}'.format( self.executor_id, not_dones_activation_ids) self._state = ExecutorState.error finally: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() print() if self._state == ExecutorState.error: logger.info(msg) if not self.log_level: print(msg) if self.data_cleaner and not self.is_cf_cluster and self._state != ExecutorState.ready: self.clean() if download_results: fs_dones = [f for f in ftrs if f.done] fs_notdones = [f for f in ftrs if not f.done] else: fs_dones = [f for f in ftrs if f.ready] fs_notdones = [f for f in ftrs if not f.ready] self._state = ExecutorState.ready return fs_dones, fs_notdones
def monitor(self, futures=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=EXECUTION_TIMEOUT, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances `fs` to complete. Returns a 2-tuple of lists. The first list contains the futures that completed (finished or cancelled) before the wait completed. The second contains uncompleted futures. :param futures: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only download statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ if not futures: futures = [] for job in self.jobs: if self.jobs[job]['state'] == JobState.running: futures.extend(self.jobs[job]['futures']) self.jobs[job]['state'] = JobState.ready if type(futures) != list: ftrs = [futures] else: ftrs = futures if not ftrs: raise Exception('You must run call_async(), map() or map_reduce()' ' before calling get_result() method') rabbit_amqp_url = None if self.rabbitmq_monitor: rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') if rabbit_amqp_url and not download_results: logger.info('Going to use RabbitMQ to monitor function activations') logging.getLogger('pika').setLevel(logging.WARNING) if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id) logger.info(msg) if not self.log_level and self._state == ExecutorState.running: print(msg) if is_unix_system(): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) pbar = None if not self.is_cf_cluster and self._state == ExecutorState.running \ and not self.log_level: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(ftrs)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(ftrs), disable=False) try: wait(ftrs, self.executor_id, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, rabbit_amqp_url=rabbit_amqp_url, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except FunctionException as e: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() logger.info(e.msg) if not is_notebook(): print() if not self.log_level: print(e.msg) if e.exc_msg: print('--> Exception: ' + e.exc_msg) else: print() traceback.print_exception(*e.exception) sys.exit() except TimeoutError: if download_results: not_dones_activation_ids = [f.activation_id for f in ftrs if not f.done and not (f.ready and not f.produce_output)] else: not_dones_activation_ids = [f.activation_id for f in ftrs if not f.ready] msg = ('ExecutorID {} - Raised timeout of {} seconds waiting for results ' '\nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids)) self._state = ExecutorState.error except KeyboardInterrupt: if download_results: not_dones_activation_ids = [f.activation_id for f in ftrs if not f.done and not (f.ready and not f.produce_output)] else: not_dones_activation_ids = [f.activation_id for f in ftrs if not f.ready] msg = 'ExecutorID {} - Cancelled \nActivations not done: {}'.format(self.executor_id, not_dones_activation_ids) self._state = ExecutorState.error finally: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() if not is_notebook(): print() if self._state == ExecutorState.error: logger.info(msg) if not self.log_level: print(msg) if download_results and self.data_cleaner and not self.is_cf_cluster: self.clean() if download_results: fs_dones = [f for f in ftrs if f.done] fs_notdones = [f for f in ftrs if not f.done] else: fs_dones = [f for f in ftrs if f.ready] fs_notdones = [f for f in ftrs if not f.ready] self._state = ExecutorState.ready return fs_dones, fs_notdones
def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ futures = self.futures if not fs else fs if type(futures) != list: futures = [futures] if not futures: raise Exception('You must run the call_async(), map() or map_reduce(), or provide' ' a list of futures before calling the wait()/get_result() method') if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) futures = [f for f in futures if not f.done] fs_done = [f for f in futures if f.done] else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id) futures = [f for f in futures if not f.ready and not f.done] fs_done = [f for f in futures if f.ready or f.done] if not futures: return fs_done, [] print(msg) if not self.log_level else logger.info(msg) if is_unix_system() and timeout is not None: logger.debug('Setting waiting timeout to {} seconds'.format(timeout)) error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(timeout) signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg)) signal.alarm(timeout) pbar = None error = False if not self.is_pywren_function and not self.log_level: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(futures)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(futures), disable=False) try: if self.rabbitmq_monitor: logger.info('Using RabbitMQ to monitor function activations') wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url, download_results=download_results, throw_except=throw_except, pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE) else: wait_storage(futures, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except KeyboardInterrupt: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}' .format(self.executor_id, len(not_dones_call_ids))) if pbar: pbar.close() print() print(msg) if not self.log_level else logger.info(msg) error = True except Exception as e: error = True raise e finally: self.invoker.stop() if is_unix_system(): signal.alarm(0) if pbar and not pbar.disable: pbar.close() if not is_notebook(): print() if self.data_cleaner and not self.is_pywren_function: self.clean(cloudobjects=False, force=False, log=False) if not fs and error and is_notebook(): del self.futures[len(self.futures)-len(futures):] if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.ready or f.done] fs_notdone = [f for f in futures if not f.ready and not f.done] return fs_done, fs_notdone