Beispiel #1
0
    def wait(self,
             fs=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             download_results=False,
             timeout=EXECUTION_TIMEOUT,
             THREADPOOL_SIZE=128,
             WAIT_DUR_SEC=1):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        if not fs:
            fs = []
            for job in self.jobs:
                if not download_results and self.jobs[job][
                        'state'] == JobState.Running:
                    fs.extend(self.jobs[job]['futures'])
                    self.jobs[job]['state'] = JobState.Ready
                elif download_results and self.jobs[job][
                        'state'] != JobState.Done:
                    fs.extend(self.jobs[job]['futures'])
                    self.jobs[job]['state'] = JobState.Done

        if type(fs) != list:
            futures = [fs]
        else:
            futures = fs

        if not futures:
            raise Exception(
                'You must run the call_async(), map() or map_reduce(), or provide'
                ' a list of futures before calling the monitor()/get_result() method'
            )

        if download_results:
            msg = 'ExecutorID {} - Getting results...'.format(self.executor_id)
        else:
            msg = 'ExecutorID {} - Waiting for functions to complete...'.format(
                self.executor_id)
        logger.info(msg)
        if not self.log_level and self._state == FunctionExecutor.State.Running:
            print(msg)

        if is_unix_system():
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(timeout)

        pbar = None
        if not self.is_remote_cluster and self._state == FunctionExecutor.State.Running \
           and not self.log_level:
            from tqdm.auto import tqdm
            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}',
                            total=len(futures))  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                            total=len(futures),
                            disable=False)

        try:
            if self.rabbitmq_monitor:
                logger.info('Using RabbitMQ to monitor function activations')
                wait_rabbitmq(futures,
                              self.internal_storage,
                              rabbit_amqp_url=self.rabbit_amqp_url,
                              download_results=download_results,
                              throw_except=throw_except,
                              pbar=pbar,
                              return_when=return_when,
                              THREADPOOL_SIZE=THREADPOOL_SIZE)
            else:
                wait_storage(futures,
                             self.internal_storage,
                             download_results=download_results,
                             throw_except=throw_except,
                             return_when=return_when,
                             pbar=pbar,
                             THREADPOOL_SIZE=THREADPOOL_SIZE,
                             WAIT_DUR_SEC=WAIT_DUR_SEC)

        except FunctionException as e:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
            logger.info(e.msg)
            if not self.log_level:
                if not is_notebook():
                    print()
                print(e.msg)
            if e.exc_msg:
                logger.info('Exception: ' + e.exc_msg)
                if not self.log_level:
                    print('--> Exception: ' + e.exc_msg)
            else:
                print()
                traceback.print_exception(*e.exception)
            sys.exit()

        except TimeoutError:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.ready and not f.done]
            msg = (
                'ExecutorID {} - Raised timeout of {} seconds waiting for results - Total Activations not done: {}'
                .format(self.executor_id, timeout, len(not_dones_call_ids)))
            self._state = FunctionExecutor.State.Error

        except KeyboardInterrupt:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.ready and not f.done]
            msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}'
                   .format(self.executor_id, len(not_dones_call_ids)))
            self._state = FunctionExecutor.State.Error

        except Exception as e:
            if not self.is_remote_cluster:
                self.clean()
            raise e

        finally:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                if not is_notebook():
                    print()
            if self._state == FunctionExecutor.State.Error:
                logger.debug(msg)
                if not self.log_level:
                    print(msg)
            if download_results and self.data_cleaner and not self.is_remote_cluster:
                self.clean()

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
            self._state = FunctionExecutor.State.Done
        else:
            fs_done = [f for f in futures if f.ready or f.done]
            fs_notdone = [f for f in futures if not f.ready and not f.done]
            self._state = FunctionExecutor.State.Ready

        return fs_done, fs_notdone
Beispiel #2
0
    def monitor(self,
                futures=None,
                throw_except=True,
                return_when=ALL_COMPLETED,
                download_results=False,
                timeout=wrenconfig.RUNTIME_TIMEOUT,
                THREADPOOL_SIZE=128,
                WAIT_DUR_SEC=1):
        """
        Wait for the Future instances `fs` to complete. Returns a 2-tuple of
        lists. The first list contains the futures that completed
        (finished or cancelled) before the wait completed. The second
        contains uncompleted futures.
        :param futures: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only download statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of lists

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data_list)
          >>> dones, not_dones = pw.monitor()
          >>> # not_dones should be an empty list.
          >>> results = [f.result() for f in dones]
        """
        if futures:
            # Ensure futures is a list
            if type(futures) != list:
                ftrs = [futures]
            else:
                ftrs = futures
        else:
            # In this case self.futures is always a list
            ftrs = self.futures

        if not ftrs:
            raise Exception('You must run pw.call_async(), pw.map()'
                            ' or pw.map_reduce() before call pw.get_result()')

        rabbit_amqp_url = None
        if self._state == ExecutorState.running:
            if self.rabbitmq_monitor:
                rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            if rabbit_amqp_url and not download_results:
                logger.info(
                    'Going to use RabbitMQ to monitor function activations')
            if download_results:
                msg = 'Executor ID {} Getting results...'.format(
                    self.executor_id)
            else:
                msg = 'Executor ID {} Waiting for functions to complete...'.format(
                    self.executor_id)
            logger.info(msg)
            if not self.log_level and self._state == ExecutorState.running:
                print(msg)

        if is_unix_system():
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(timeout)

        pbar = None
        if not self.is_cf_cluster and self._state == ExecutorState.running \
           and not self.log_level and not is_notebook():
            import tqdm
            print()
            pbar = tqdm.tqdm(
                bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                total=len(ftrs),
                disable=False)

        try:
            wait(ftrs,
                 self.executor_id,
                 self.internal_storage,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 rabbit_amqp_url=rabbit_amqp_url,
                 pbar=pbar,
                 THREADPOOL_SIZE=THREADPOOL_SIZE,
                 WAIT_DUR_SEC=WAIT_DUR_SEC)

        except TimeoutError:
            if download_results:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.done
                ]
            else:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.ready
                ]
            msg = (
                'Executor ID {} Raised timeout of {} seconds waiting for results '
                '\nActivations not done: {}'.format(self.executor_id, timeout,
                                                    not_dones_activation_ids))
            self._state = ExecutorState.error

        except KeyboardInterrupt:
            if download_results:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.done
                ]
            else:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.ready
                ]
            msg = 'Executor ID {} Cancelled  \nActivations not done: {}'.format(
                self.executor_id, not_dones_activation_ids)
            self._state = ExecutorState.error

        finally:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                print()
            if self._state == ExecutorState.error:
                logger.info(msg)
                if not self.log_level:
                    print(msg)
            if self.data_cleaner and not self.is_cf_cluster and self._state != ExecutorState.ready:
                self.clean()

        if download_results:
            fs_dones = [f for f in ftrs if f.done]
            fs_notdones = [f for f in ftrs if not f.done]
        else:
            fs_dones = [f for f in ftrs if f.ready]
            fs_notdones = [f for f in ftrs if not f.ready]

        self._state = ExecutorState.ready

        return fs_dones, fs_notdones
Beispiel #3
0
    def monitor(self, futures=None, throw_except=True, return_when=ALL_COMPLETED,
                download_results=False, timeout=EXECUTION_TIMEOUT,
                THREADPOOL_SIZE=128, WAIT_DUR_SEC=1):
        """
        Wait for the Future instances `fs` to complete. Returns a 2-tuple of
        lists. The first list contains the futures that completed
        (finished or cancelled) before the wait completed. The second
        contains uncompleted futures.
        :param futures: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only download statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        if not futures:
            futures = []
            for job in self.jobs:
                if self.jobs[job]['state'] == JobState.running:
                    futures.extend(self.jobs[job]['futures'])
                    self.jobs[job]['state'] = JobState.ready

        if type(futures) != list:
            ftrs = [futures]
        else:
            ftrs = futures

        if not ftrs:
            raise Exception('You must run call_async(), map() or map_reduce()'
                            ' before calling get_result() method')

        rabbit_amqp_url = None
        if self.rabbitmq_monitor:
            rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
        if rabbit_amqp_url and not download_results:
            logger.info('Going to use RabbitMQ to monitor function activations')
            logging.getLogger('pika').setLevel(logging.WARNING)
        if download_results:
            msg = 'ExecutorID {} - Getting results...'.format(self.executor_id)
        else:
            msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id)
        logger.info(msg)
        if not self.log_level and self._state == ExecutorState.running:
            print(msg)

        if is_unix_system():
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(timeout)

        pbar = None
        if not self.is_cf_cluster and self._state == ExecutorState.running \
           and not self.log_level:
            from tqdm.auto import tqdm
            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(ftrs))  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ', total=len(ftrs), disable=False)

        try:
            wait(ftrs, self.executor_id, self.internal_storage, download_results=download_results,
                 throw_except=throw_except, return_when=return_when, rabbit_amqp_url=rabbit_amqp_url,
                 pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC)

        except FunctionException as e:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
            logger.info(e.msg)
            if not is_notebook():
                print()
            if not self.log_level:
                print(e.msg)
            if e.exc_msg:
                print('--> Exception: ' + e.exc_msg)
            else:
                print()
                traceback.print_exception(*e.exception)
            sys.exit()

        except TimeoutError:
            if download_results:
                not_dones_activation_ids = [f.activation_id for f in ftrs if not f.done and not (f.ready and not f.produce_output)]
            else:
                not_dones_activation_ids = [f.activation_id for f in ftrs if not f.ready]
            msg = ('ExecutorID {} - Raised timeout of {} seconds waiting for results '
                   '\nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids))
            self._state = ExecutorState.error

        except KeyboardInterrupt:
            if download_results:
                not_dones_activation_ids = [f.activation_id for f in ftrs if not f.done and not (f.ready and not f.produce_output)]
            else:
                not_dones_activation_ids = [f.activation_id for f in ftrs if not f.ready]
            msg = 'ExecutorID {} - Cancelled  \nActivations not done: {}'.format(self.executor_id, not_dones_activation_ids)
            self._state = ExecutorState.error

        finally:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                if not is_notebook():
                    print()
            if self._state == ExecutorState.error:
                logger.info(msg)
                if not self.log_level:
                    print(msg)
            if download_results and self.data_cleaner and not self.is_cf_cluster:
                self.clean()

        if download_results:
            fs_dones = [f for f in ftrs if f.done]
            fs_notdones = [f for f in ftrs if not f.done]
        else:
            fs_dones = [f for f in ftrs if f.ready]
            fs_notdones = [f for f in ftrs if not f.ready]

        self._state = ExecutorState.ready

        return fs_dones, fs_notdones
Beispiel #4
0
    def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False,
             timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = self.futures if not fs else fs
        if type(futures) != list:
            futures = [futures]

        if not futures:
            raise Exception('You must run the call_async(), map() or map_reduce(), or provide'
                            ' a list of futures before calling the wait()/get_result() method')

        if download_results:
            msg = 'ExecutorID {} - Getting results...'.format(self.executor_id)
            futures = [f for f in futures if not f.done]
            fs_done = [f for f in futures if f.done]
        else:
            msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id)
            futures = [f for f in futures if not f.ready and not f.done]
            fs_done = [f for f in futures if f.ready or f.done]

        if not futures:
            return fs_done, []

        print(msg) if not self.log_level else logger.info(msg)

        if is_unix_system() and timeout is not None:
            logger.debug('Setting waiting timeout to {} seconds'.format(timeout))
            error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(timeout)
            signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg))
            signal.alarm(timeout)

        pbar = None
        error = False
        if not self.is_pywren_function and not self.log_level:
            from tqdm.auto import tqdm

            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(futures))  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ', total=len(futures), disable=False)

        try:
            if self.rabbitmq_monitor:
                logger.info('Using RabbitMQ to monitor function activations')
                wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url,
                              download_results=download_results, throw_except=throw_except,
                              pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE)
            else:
                wait_storage(futures, self.internal_storage, download_results=download_results,
                             throw_except=throw_except, return_when=return_when, pbar=pbar,
                             THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC)

        except KeyboardInterrupt:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done]
            msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}'
                   .format(self.executor_id, len(not_dones_call_ids)))
            if pbar:
                pbar.close()
                print()
            print(msg) if not self.log_level else logger.info(msg)
            error = True

        except Exception as e:
            error = True
            raise e

        finally:
            self.invoker.stop()
            if is_unix_system():
                signal.alarm(0)
            if pbar and not pbar.disable:
                pbar.close()
                if not is_notebook():
                    print()
            if self.data_cleaner and not self.is_pywren_function:
                self.clean(cloudobjects=False, force=False, log=False)
            if not fs and error and is_notebook():
                del self.futures[len(self.futures)-len(futures):]

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.ready or f.done]
            fs_notdone = [f for f in futures if not f.ready and not f.done]

        return fs_done, fs_notdone