Beispiel #1
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 log_level=None,
                 runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_timeout: Max time per action. Default 600
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime

        if log_level:
            wrenlogging.default_config(log_level)

        ibm_cf_config = self.config['ibm_cf']
        self.runtime = ibm_cf_config['action_name']
        self.cf_cluster = ibm_cf_config['is_cf_cluster']
        self.data_cleaner = self.config['pywren']['data_cleaner']

        retry_config = {}
        retry_config['invocation_retry'] = self.config['pywren'][
            'invocation_retry']
        retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps']
        retry_config['retries'] = self.config['pywren']['retries']

        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config,
                                                    retry_config)

        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.internal_storage,
                                 runtime_timeout)
        self.executor_id = self.executor.executor_id

        self.futures = []
        self.reduce_future = None
Beispiel #2
0
    def __init__(self, config=None, runtime=None, log_level=None, job_max_runtime=JOB_MAX_RUNTIME):
        """
        Initialize and return an executor class.
    
        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param job_max_runtime: Max time per lambda. Default 300
        :return `executor` object.
    
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new
        
        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime
            
        if log_level:
            wrenlogging.default_config(log_level)
        
        self._openwhisk = False
        if any([k.startswith('__OW_') for k in os.environ.keys()]):
            # OpenWhisk execution
            self._openwhisk = True
            wrenlogging.ow_config(logging.INFO)
    
        self.runtime = self.config['ibm_cf']['action_name']
    
        ibm_cf_config = self.config['ibm_cf']
        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config)
        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.storage_handler = storage.Storage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.storage_handler, job_max_runtime)
        self.executor_id = self.executor.executor_id

        self.futures = None
        self.reduce_future = None
        
        log_msg='IBM Cloud Functions executor created with ID {}'.format(self.executor_id)
        logger.info(log_msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(log_msg)
Beispiel #3
0
class ibm_cf_executor(object):
    
    def __init__(self, config=None, runtime=None, log_level=None, job_max_runtime=JOB_MAX_RUNTIME):
        """
        Initialize and return an executor class.
    
        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param job_max_runtime: Max time per lambda. Default 300
        :return `executor` object.
    
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new
        
        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime
            
        if log_level:
            wrenlogging.default_config(log_level)
        
        self._openwhisk = False
        if any([k.startswith('__OW_') for k in os.environ.keys()]):
            # OpenWhisk execution
            self._openwhisk = True
            wrenlogging.ow_config(logging.INFO)
    
        self.runtime = self.config['ibm_cf']['action_name']
    
        ibm_cf_config = self.config['ibm_cf']
        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config)
        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.storage_handler = storage.Storage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.storage_handler, job_max_runtime)
        self.executor_id = self.executor.executor_id

        self.futures = None
        self.reduce_future = None
        
        log_msg='IBM Cloud Functions executor created with ID {}'.format(self.executor_id)
        logger.info(log_msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(log_msg)

    def call_async(self, func, data, extra_env=None, extra_meta=None):
        """
        For run one function execution
        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional environment variables for lambda environment. Default None.
        :param extra_meta: Additional metadata to pass to lambda. Default None.

        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> future = pw.call_async(foo, data)
        """
        if not self._state == ExecutorState.new:
            raise Exception('You cannot run pw.call_async() in the current state,'
                            ' create a new pywren.ibm_cf_executor() instance.')
        self._state = ExecutorState.single_call
        self.futures = self.executor.call_async(func, data, extra_env, extra_meta)[0]
        
        return self.futures

    def map(self, func, iterdata, extra_env=None, extra_meta=None, 
            remote_invocation=False, invoke_pool_threads=10, data_all_as_one=True,
            overwrite_invoke_args=None, exclude_modules=None):
        """
        :param func: the function to map over the data
        :param iterdata: An iterable of input data
        :param extra_env: Additional environment variables for lambda environment. Default None.
        :param extra_meta: Additional metadata to pass to lambda. Default None.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :return: A list with size `len(iterdata)` of futures for each job
        :rtype: list of futures.

        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> futures = pw.map(foo, data_list)
        """
        if not self._state == ExecutorState.new:
            raise Exception('You cannot run pw.map() in the current state.'
                            ' Create a new pywren.ibm_cf_executor() instance.')

        def remote_invoker(input_data):
            pw = pywren.ibm_cf_executor()
            return pw.map(func, input_data)

        if type(iterdata) != list:
            iterdata = list(iterdata)
        
        if len(iterdata) > 1 and remote_invocation:
            map_func = remote_invoker
            #map_iterdata = [[iterdata, ]]
            map_iterdata = [[iterdata[x:x+100]] for x in range(0, len(iterdata), 100)]
            invoke_pool_threads = 1
        else:
            remote_invocation = False
            map_func = func
            map_iterdata = iterdata

        self.futures = self.executor.map(func=map_func, iterdata=map_iterdata,
                                         extra_env=extra_env, extra_meta=extra_meta,
                                         invoke_pool_threads=invoke_pool_threads,
                                         data_all_as_one=data_all_as_one,
                                         overwrite_invoke_args=overwrite_invoke_args,
                                         exclude_modules=exclude_modules,
                                         original_func_name=func.__name__)
        
        if remote_invocation:
            msg='Executor ID {} Getting remote invocations'.format(self.executor_id)
            logger.info(msg)
            if(logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
                        
            #self.futures = self.futures[0].result(storage_handler=self.storage_handler)

            def fetch_future_results(f):
                f.result(storage_handler=self.storage_handler)
                return f
            pool = ThreadPool(32)
            pool.map(fetch_future_results, self.futures)
            new_futures = [f.result() for f in self.futures if f.done]
            self.futures = []
            for futures_list in new_futures:
                self.futures.extend(futures_list)
        
        self._state = ExecutorState.map
        
        if type(self.futures) == list and len(self.futures) == 1:
            self.futures = self.futures[0]
            self._state = ExecutorState.single_call
        
        return self.futures

    def map_reduce(self, map_function, map_iterdata, reduce_function,
                   chunk_size=64*1024**2, reducer_one_per_object = False,
                   reducer_wait_local=True, throw_except=True,
                   extra_env=None, extra_meta=None):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.
        :param map_function: the function to map over the data
        :param reduce_function:  the function to reduce over the futures
        :param map_iterdata:  the function to reduce over the futures
        :param chunk_size: the size of the data chunks
        :param extra_env: Additional environment variables for lambda environment. Default None.
        :param extra_meta: Additional metadata to pass to lambda. Default None.
        :return: A list with size `len(map_iterdata)` of futures for each job
        
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map_reduce(foo, bar, data_list)
        """
        
        if not self._state == ExecutorState.new:
            raise Exception('You cannot run pw.map_reduce() in the current state.'
                            ' Create a new pywren.ibm_cf_executor() instance.')
           
        self.futures = self.executor.map_reduce(map_function, map_iterdata,
                                                reduce_function, chunk_size,
                                                reducer_one_per_object,
                                                reducer_wait_local,
                                                throw_except, extra_env, extra_meta)
        
        self._state = ExecutorState.map_reduce
        
        if type(self.futures) == list and len(self.futures) == 1:
            self.futures = self.futures[0]
            self._state = ExecutorState.single_call
        if type(self.futures) != list:
            self._state = ExecutorState.single_call

        return self.futures

    def wait(self, throw_except=True, verbose=True, return_when=ALL_COMPLETED,
             THREADPOOL_SIZE=64, WAIT_DUR_SEC=4):
        """
        Wait for the Future instances `fs` to complete. Returns a 2-tuple of
        lists. The first list contains the futures that completed
        (finished or cancelled) before the wait completed. The second
        contains uncompleted futures.

        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: `(fs_dones, fs_notdones)`
            where `fs_dones` is a list of futures that have completed
            and `fs_notdones` is a list of futures that have not completed.
        :rtype: 2-tuple of lists
        
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data_list)
          >>> dones, not_dones = pw.wait()
          >>> # not_dones should be an empty list.
          >>> results = [f.result() for f in dones]
        """
        if not self._state == ExecutorState.map or not self._state == ExecutorState.map_reduce:
            raise Exception('You must run pw.map() or pw.map_reduce() before call pw.wait()')
        
        return wait(self.futures, self.executor_id, self.storage_handler,
                    throw_except, verbose, return_when, THREADPOOL_SIZE, WAIT_DUR_SEC)


    def get_result(self, throw_except=True, verbose=False, timeout=JOB_MAX_RUNTIME):
        """
        For get PyWren results
        :param throw_except: Reraise exception if call raised. Default true.
        :param verbose: Shows some information prints.
        :return: The result of the future/s

        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.call_async(foo, data)
          >>> result = pw.get_result()
        """
        if self._state == ExecutorState.single_call:
            return self._get_result(throw_except=throw_except, verbose=verbose, timeout=timeout)
        else:
            return self._get_all_results(throw_except=throw_except, verbose=verbose, timeout=timeout)

    def _get_result(self, throw_except=True, verbose=False, timeout=JOB_MAX_RUNTIME):
        """
        For get one function execution (future) result
        :param throw_except: Reraise exception if call raised. Default true.
        :param verbose: Shows some information prints.
        :return: The result of the call_async future

        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.call_async(foo, data)
          >>> result = pw._get_result()
        """
        if self._state == ExecutorState.new:
            raise Exception('You must run pw.call_async(), or pw.map() '
                            'or pw.map_reduce() before call pw.get_result()')
        
        msg='Executor ID {} Getting result'.format(self.executor_id)
        logger.info(msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(msg)            
       
        signal.signal(signal.SIGALRM, timeout_handler) 
        signal.alarm(timeout)
        
        try:
            if not verbose:
                import tqdm
                print()
                pbar = tqdm.tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                                 total=1, disable=False)
            while not self.futures.done:
                result = self.futures.result(storage_handler=self.storage_handler,
                                             throw_except=throw_except, 
                                             verbose=verbose)
                signal.alarm(timeout)
                
            if not verbose:
                pbar.update(1)
                pbar.close()
                print()

            self._state = ExecutorState.success

        except (TimeoutError, IndexError):
            if not verbose:
                if pbar:
                    pbar.close()
                    print() 
            msg=('Executor ID {} Raised timeout of {} seconds getting the '
                 'result from Activation ID {}'.format(self.executor_id, timeout,
                                                       self.futures.activation_id))
            logger.info(msg)
            if(logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            self._state = ExecutorState.error
            result = None
        
        except KeyboardInterrupt:
            if not verbose:
                if pbar:
                    pbar.close()
                    print() 
            msg='Executor ID {} Cancelled'.format(self.executor_id)
            logger.info(msg)
            if(logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            exit()
        
        finally:
            signal.alarm(0)
            if not verbose:
                if pbar:
                    pbar.close() 
            self._clean()
            print()

        return result

    def _get_all_results(self, throw_except=True, verbose=False,
                         timeout=JOB_MAX_RUNTIME, THREADPOOL_SIZE=64,
                         WAIT_DUR_SEC=3):
        """
        Take in a list of futures, call result on each one individually
        by using a threadpool, and return those results. Useful to fetch
        the results as they are produced.
    
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Show results (True) or progress bar (False). Default False.
        :return: A list of the results of each futures
        :rtype: list
    
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data)
          >>> results = pw._get_all_results()    
        """
        if self._state == ExecutorState.new:
            raise Exception('You must run pw.map() or pw.map_reduce() '
                            'before call pw.get_all_results()')
        
        msg='Executor ID {} Getting results'.format(self.executor_id)
        logger.info(msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(msg)

        def timeout_handler(signum, frame):
            raise TimeoutError()

        signal.signal(signal.SIGALRM, timeout_handler) 
        signal.alarm(timeout)
        
        try: 
            pool = ThreadPool(THREADPOOL_SIZE)
            
            def fetch_future_results(f):
                f.result(storage_handler=self.storage_handler,
                         throw_except=throw_except, verbose=verbose)
                return f
            
            N = len(self.futures)
            if not verbose:
                import tqdm
                print()
                pbar = tqdm.tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                                 total=N, disable=False)
    
            callids_done_in_callset = set()
            call_ids = set()
            
            while len(callids_done_in_callset)<N:
                sleep = WAIT_DUR_SEC-((len(callids_done_in_callset)/N)*WAIT_DUR_SEC)
                time.sleep(sleep)
                
                current_call_ids = set([(f.callgroup_id, f.call_id) for f in self.futures])
                call_ids = set(self.storage_handler.get_callset_status(self.executor_id))
                call_ids_to_check = call_ids.intersection(current_call_ids)
    
                not_done_call_ids = call_ids_to_check.difference(callids_done_in_callset)
    
                still_not_done_futures = [f for f in self.futures if ((f.callgroup_id, f.call_id) in not_done_call_ids)]
                
                if verbose and still_not_done_futures:
                    pool.map(fetch_future_results, still_not_done_futures)
                elif still_not_done_futures:
                    futures = pool.map(fetch_future_results, still_not_done_futures)            
                    for f in futures:
                        if f.done:
                            pbar.update(1)
                    pbar.refresh()
    
                callids_done_in_callset.update([(f.callgroup_id, f.call_id) for f in still_not_done_futures if f.done])
    
            if not verbose:
                pbar.close()
                print()
            pool.close()
            self._state = ExecutorState.success
        
        except (TimeoutError, IndexError):
            if not verbose:
                if pbar:
                    pbar.close()
                    print() 
            not_dones_activation_ids = set([f.activation_id for f in self.futures if not f.done])
            msg='Executor ID {} Raised timeout of {} seconds getting results \nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids)
            logger.info(msg)
            if(logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            self._state = ExecutorState.error
        
        except KeyboardInterrupt:
            if not verbose:
                if pbar:
                    pbar.close()
                    print()
            not_dones_activation_ids = [f.activation_id for f in self.futures if not f.done]
            msg='Executor ID {} Cancelled  \nActivations not done: {}'.format(self.executor_id, not_dones_activation_ids)
            logger.info(msg)
            if(logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            exit()
        
        finally:
            if not verbose:
                if pbar:
                    pbar.close()
            signal.alarm(0)
            self._clean()
            print()
        
        results = [f.result(throw_except=throw_except) for f in self.futures if f.done]
        
        return results

    def _clean(self, local_execution=True):
        """
        Deletes all the files from COS. These files include the function,
        the data serialization and the function invocation results.
        """
        storage_bucket = self.storage_config['storage_bucket']
        storage_prerix = self.storage_config['storage_prefix']
        storage_prerix = os.path.join(storage_prerix, self.executor_id)
        
        msg="Executor ID {} Cleaning partial results from PyWren bucket '{}'".format(self.executor_id, storage_bucket)
        logger.info(msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(msg)

        if local_execution:
            #storage_config = json.dumps(self.storage_handler.get_storage_config())
            #storage_config = storage_config.replace('"', '\\"')
            '''
            cmdstr = ("python3 -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \
                                   clean_bucket(\"{}\", \"{}\", \"{}\")'".format(storage_bucket,
                                                                                 storage_prerix, storage_config))
            '''
            clean_bucket(storage_bucket, storage_prerix, self.storage_config)
            #os.popen(cmdstr)
        else:
            extra_env = {'NOT_STORE_RESULTS': 'True'}
            sys.stdout = open(os.devnull, 'w')
            self.executor.call_async(clean_os_bucket, [storage_bucket, storage_prerix], extra_env=extra_env)
            sys.stdout = sys.__stdout__
        
        self._state = ExecutorState.closed
        msg="Executor ID {} Finished".format(self.executor_id)
        logger.info(msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(msg)
Beispiel #4
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 runtime_memory=None,
                 log_level=None,
                 rabbitmq_monitor=False):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_memory: memory to use in the runtime
        :param log_level: log level to use during the execution
        :param rabbitmq_monitor: use rabbitmq as monitoring system
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self.start_time = time.time()
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        self.is_cf_cluster = is_cf_cluster()
        self.data_cleaner = self.config['pywren']['data_cleaner']

        # Overwrite runtime variables
        if runtime:
            self.config['pywren']['runtime'] = runtime
        if runtime_memory:
            self.config['pywren']['runtime_memory'] = int(runtime_memory)

        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if (logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(
                    logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOG_LEVEL"] = self.log_level
            if not self.is_cf_cluster:
                wrenlogging.default_config(self.log_level)

        # RabbitMQ monitor configuration
        self.rabbitmq_monitor = rabbitmq_monitor
        if self.rabbitmq_monitor:
            if self.config['rabbitmq']['amqp_url']:
                os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True'
            else:
                self.rabbitmq_monitor = False
        else:
            self.config['rabbitmq']['amqp_url'] = None

        storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(storage_config)

        invoker = invokers.IBMCloudFunctionsInvoker(self.config)
        self.executor = Executor(invoker, self.config, self.internal_storage)
        self.executor_id = self.executor.executor_id

        self.futures = []
Beispiel #5
0
class ibm_cf_executor:
    def __init__(self,
                 config=None,
                 runtime=None,
                 runtime_memory=None,
                 log_level=None,
                 rabbitmq_monitor=False):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_memory: memory to use in the runtime
        :param log_level: log level to use during the execution
        :param rabbitmq_monitor: use rabbitmq as monitoring system
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self.start_time = time.time()
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        self.is_cf_cluster = is_cf_cluster()
        self.data_cleaner = self.config['pywren']['data_cleaner']

        # Overwrite runtime variables
        if runtime:
            self.config['pywren']['runtime'] = runtime
        if runtime_memory:
            self.config['pywren']['runtime_memory'] = int(runtime_memory)

        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if (logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(
                    logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOG_LEVEL"] = self.log_level
            if not self.is_cf_cluster:
                wrenlogging.default_config(self.log_level)

        # RabbitMQ monitor configuration
        self.rabbitmq_monitor = rabbitmq_monitor
        if self.rabbitmq_monitor:
            if self.config['rabbitmq']['amqp_url']:
                os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True'
            else:
                self.rabbitmq_monitor = False
        else:
            self.config['rabbitmq']['amqp_url'] = None

        storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(storage_config)

        invoker = invokers.IBMCloudFunctionsInvoker(self.config)
        self.executor = Executor(invoker, self.config, self.internal_storage)
        self.executor_id = self.executor.executor_id

        self.futures = []

    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   extra_meta=None,
                   timeout=wrenconfig.RUNTIME_TIMEOUT):
        """
        For run one function execution
        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> future = pw.call_async(foo, data)
        """
        if self._state == ExecutorState.finished:
            raise Exception(
                'You cannot run pw.call_async() in the current state,'
                ' create a new pywren.ibm_cf_executor() instance.')

        future = self.executor.call_async(func, data, extra_env, extra_meta,
                                          timeout)[0]
        self.futures.append(future)
        self._state = ExecutorState.running

        return future

    def map(self,
            map_function,
            map_iterdata,
            extra_env=None,
            extra_meta=None,
            chunk_size=None,
            remote_invocation=False,
            timeout=wrenconfig.RUNTIME_TIMEOUT,
            remote_invocation_groups=None,
            invoke_pool_threads=500,
            data_all_as_one=True,
            overwrite_invoke_args=None,
            exclude_modules=None):
        """
        :param func: the function to map over the data
        :param iterdata: An iterable of input data
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.
        :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map
        :param data_type: the type of the data. Now allowed: None (files with newline) and csv.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :return: A list with size `len(iterdata)` of futures for each job
        :rtype: list of futures.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> futures = pw.map(foo, data_list)
        """
        if self._state == ExecutorState.finished:
            raise Exception('You cannot run pw.map() in the current state.'
                            ' Create a new pywren.ibm_cf_executor() instance.')

        inv_action_name = self.executor.invoker.action_name
        if len(map_iterdata) == 1 or self.is_cf_cluster:
            # Ensure no remote invocation in these particular cases
            remote_invocation = False

        if remote_invocation:
            ria_memory = wrenconfig.RUNTIME_RI_MEMORY_DEFAULT
            self.executor.invoker.action_name = create_ri_action_name(
                inv_action_name, ria_memory)

        map_futures, unused_ppo = self.executor.map(
            map_function=map_function,
            iterdata=map_iterdata,
            obj_chunk_size=chunk_size,
            extra_env=extra_env,
            extra_meta=extra_meta,
            remote_invocation=remote_invocation,
            remote_invocation_groups=remote_invocation_groups,
            invoke_pool_threads=invoke_pool_threads,
            data_all_as_one=data_all_as_one,
            overwrite_invoke_args=overwrite_invoke_args,
            exclude_modules=exclude_modules,
            job_max_runtime=timeout)
        self.futures.extend(map_futures)
        self.executor.invoker.action_name = inv_action_name
        self._state = ExecutorState.running

        if len(map_futures) == 1:
            return map_futures[0]
        return map_futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   extra_env=None,
                   extra_meta=None,
                   chunk_size=None,
                   remote_invocation=False,
                   remote_invocation_groups=None,
                   timeout=wrenconfig.RUNTIME_TIMEOUT,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   invoke_pool_threads=500,
                   data_all_as_one=True,
                   overwrite_invoke_args=None,
                   exclude_modules=None):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.
        :param map_function: the function to map over the data
        :param map_iterdata:  the function to reduce over the futures
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.
        :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map
        :param data_type: the type of the data. Now allowed: None (files with newline) and csv.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :return: A list with size `len(map_iterdata)` of futures for each job

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map_reduce(foo, map_data_list, bar)
        """

        if self._state == ExecutorState.finished:
            raise Exception(
                'You cannot run pw.map_reduce() in the current state.'
                ' Create a new pywren.ibm_cf_executor() instance.')

        inv_action_name = self.executor.invoker.action_name

        if len(map_iterdata) == 1 or self.is_cf_cluster:
            # Ensure no remote invocation in these particular cases
            remote_invocation = False

        if remote_invocation:
            ria_memory = wrenconfig.RUNTIME_RI_MEMORY_DEFAULT
            self.executor.invoker.action_name = create_ri_action_name(
                inv_action_name, ria_memory)

        map_futures, parts_per_object = self.executor.map(
            map_function,
            map_iterdata,
            extra_env=extra_env,
            extra_meta=extra_meta,
            obj_chunk_size=chunk_size,
            remote_invocation=remote_invocation,
            remote_invocation_groups=remote_invocation_groups,
            invoke_pool_threads=invoke_pool_threads,
            data_all_as_one=data_all_as_one,
            overwrite_invoke_args=overwrite_invoke_args,
            exclude_modules=exclude_modules,
            job_max_runtime=timeout)

        self._state = ExecutorState.running
        if reducer_wait_local:
            self.monitor(futures=map_futures)

        self.executor.invoker.action_name = inv_action_name
        futures = self.executor.reduce(reduce_function, map_futures,
                                       parts_per_object,
                                       reducer_one_per_object, extra_env,
                                       extra_meta)
        self.futures.extend(futures)

        if len(futures) == 1:
            return futures[0]
        return futures

    def monitor(self,
                futures=None,
                throw_except=True,
                return_when=ALL_COMPLETED,
                download_results=False,
                timeout=wrenconfig.RUNTIME_TIMEOUT,
                THREADPOOL_SIZE=128,
                WAIT_DUR_SEC=1):
        """
        Wait for the Future instances `fs` to complete. Returns a 2-tuple of
        lists. The first list contains the futures that completed
        (finished or cancelled) before the wait completed. The second
        contains uncompleted futures.
        :param futures: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only download statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of lists

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data_list)
          >>> dones, not_dones = pw.monitor()
          >>> # not_dones should be an empty list.
          >>> results = [f.result() for f in dones]
        """
        if futures:
            # Ensure futures is a list
            if type(futures) != list:
                ftrs = [futures]
            else:
                ftrs = futures
        else:
            # In this case self.futures is always a list
            ftrs = self.futures

        if not ftrs:
            raise Exception('You must run pw.call_async(), pw.map()'
                            ' or pw.map_reduce() before call pw.get_result()')

        rabbit_amqp_url = None
        if self._state == ExecutorState.running:
            if self.rabbitmq_monitor:
                rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            if rabbit_amqp_url and not download_results:
                logger.info(
                    'Going to use RabbitMQ to monitor function activations')
            if download_results:
                msg = 'Executor ID {} Getting results...'.format(
                    self.executor_id)
            else:
                msg = 'Executor ID {} Waiting for functions to complete...'.format(
                    self.executor_id)
            logger.info(msg)
            if not self.log_level and self._state == ExecutorState.running:
                print(msg)

        if is_unix_system():
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(timeout)

        pbar = None
        if not self.is_cf_cluster and self._state == ExecutorState.running \
           and not self.log_level and not is_notebook():
            import tqdm
            print()
            pbar = tqdm.tqdm(
                bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                total=len(ftrs),
                disable=False)

        try:
            wait(ftrs,
                 self.executor_id,
                 self.internal_storage,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 rabbit_amqp_url=rabbit_amqp_url,
                 pbar=pbar,
                 THREADPOOL_SIZE=THREADPOOL_SIZE,
                 WAIT_DUR_SEC=WAIT_DUR_SEC)

        except TimeoutError:
            if download_results:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.done
                ]
            else:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.ready
                ]
            msg = (
                'Executor ID {} Raised timeout of {} seconds waiting for results '
                '\nActivations not done: {}'.format(self.executor_id, timeout,
                                                    not_dones_activation_ids))
            self._state = ExecutorState.error

        except KeyboardInterrupt:
            if download_results:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.done
                ]
            else:
                not_dones_activation_ids = [
                    f.activation_id for f in ftrs if not f.ready
                ]
            msg = 'Executor ID {} Cancelled  \nActivations not done: {}'.format(
                self.executor_id, not_dones_activation_ids)
            self._state = ExecutorState.error

        finally:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                print()
            if self._state == ExecutorState.error:
                logger.info(msg)
                if not self.log_level:
                    print(msg)
            if self.data_cleaner and not self.is_cf_cluster and self._state != ExecutorState.ready:
                self.clean()

        if download_results:
            fs_dones = [f for f in ftrs if f.done]
            fs_notdones = [f for f in ftrs if not f.done]
        else:
            fs_dones = [f for f in ftrs if f.ready]
            fs_notdones = [f for f in ftrs if not f.ready]

        self._state = ExecutorState.ready

        return fs_dones, fs_notdones

    def get_result(self,
                   futures=None,
                   throw_except=True,
                   timeout=wrenconfig.RUNTIME_TIMEOUT,
                   THREADPOOL_SIZE=64,
                   WAIT_DUR_SEC=1):
        """
        For getting PyWren results
        :param futures: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: The result of the future/s

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data)
          >>> results = pw.get_result()
        """
        fs_dones, unused_fs_notdones = self.monitor(
            futures=futures,
            throw_except=throw_except,
            timeout=timeout,
            download_results=True,
            THREADPOOL_SIZE=THREADPOOL_SIZE,
            WAIT_DUR_SEC=WAIT_DUR_SEC)
        result = [f.result() for f in fs_dones if f.done and not f.futures]
        msg = "Executor ID {} Finished getting results".format(
            self.executor_id)
        logger.info(msg)
        if not self.log_level:
            print(msg)

        if result and len(result) == 1:
            return result[0]
        return result

    def create_timeline_plots(self, dst_dir, dst_file_name, futures=None):
        """
        Creates timeline and histogram of the current execution in dst.

        :param dst: destination folder to save .png plots.
        :param name: name of the file.
        :param run_statuses: run statuses timestamps.
        :param invoke_statuses: invocation statuses timestamps.
        """
        if futures:
            ftrs = futures
        else:
            ftrs = self.futures

        if not ftrs or self._state == ExecutorState.new:
            raise Exception(
                'You must run pw.call_async(), pw.map() or pw.map_reduce()'
                ' before call pw.create_timeline_plots()')

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from pywren_ibm_cloud.plots import create_timeline, create_histogram

        msg = 'Executor ID {} Creating timeline plots'.format(self.executor_id)
        logger.info(msg)
        if not self.log_level:
            print(msg)
            if self.data_cleaner:
                print()

        if self.rabbitmq_monitor and not futures:
            ftrs_to_plot = self.futures
            self.monitor(futures=ftrs_to_plot)
        else:
            ftrs_to_plot = [f for f in ftrs if f.ready or f.done]

        if not ftrs_to_plot:
            return

        run_statuses = [f.run_status for f in ftrs_to_plot]
        invoke_statuses = [f.invoke_status for f in ftrs_to_plot]

        if self.rabbitmq_monitor and invoke_statuses:
            for in_stat in invoke_statuses:
                del in_stat['status_done_timestamp']

        create_timeline(dst_dir, dst_file_name, self.start_time, run_statuses,
                        invoke_statuses, self.config['ibm_cos'])
        create_histogram(dst_dir, dst_file_name, self.start_time, run_statuses,
                         self.config['ibm_cos'])

    def clean(self, local_execution=True):
        """
        Deletes all the files from COS. These files include the function,
        the data serialization and the function invocation results.
        """
        storage_bucket = self.config['pywren']['storage_bucket']
        storage_prerix = self.config['pywren']['storage_prefix']
        storage_prerix = os.path.join(storage_prerix,
                                      self.executor_id).replace("\\", "/")

        msg = "Executor ID {} Cleaning partial results from cos://{}/{}".format(
            self.executor_id, storage_bucket, storage_prerix)
        logger.info(msg)
        if not self.log_level:
            print(msg)
            if not self.data_cleaner:
                print()

        if local_execution:
            # 1st case: Not background. The main code waits until the cleaner finishes its execution.
            # It is not ideal for performance tests, since it can take long time to complete.
            # clean_os_bucket(storage_bucket, storage_prerix, self.internal_storage)

            # 2nd case: Execute in Background as a subprocess. The main program does not wait for its completion.
            storage_config = json.dumps(
                self.internal_storage.get_storage_config())
            storage_config = storage_config.replace('"', '\\"')

            cmdstr = (
                "{} -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \
                              clean_bucket(\"{}\", \"{}\", \"{}\")'".format(
                    sys.executable, storage_bucket, storage_prerix,
                    storage_config))
            os.popen(cmdstr)

        else:
            extra_env = {'STORE_STATUS': False, 'STORE_RESULT': False}
            sys.stdout = open(os.devnull, 'w')
            self.executor.call_async(clean_os_bucket,
                                     [storage_bucket, storage_prerix],
                                     extra_env=extra_env)
            sys.stdout = sys.__stdout__

        self._state = ExecutorState.finished
Beispiel #6
0
class ibm_cf_executor:
    def __init__(self,
                 config=None,
                 runtime=None,
                 log_level=None,
                 runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_timeout: Max time per action. Default 600
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime

        if log_level:
            wrenlogging.default_config(log_level)

        ibm_cf_config = self.config['ibm_cf']
        self.runtime = ibm_cf_config['action_name']
        self.cf_cluster = ibm_cf_config['is_cf_cluster']
        self.data_cleaner = self.config['pywren']['data_cleaner']

        retry_config = {}
        retry_config['invocation_retry'] = self.config['pywren'][
            'invocation_retry']
        retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps']
        retry_config['retries'] = self.config['pywren']['retries']

        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config,
                                                    retry_config)

        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.internal_storage,
                                 runtime_timeout)
        self.executor_id = self.executor.executor_id

        self.futures = []
        self.reduce_future = None

    def call_async(self, func, data, extra_env=None, extra_meta=None):
        """
        For run one function execution
        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> future = pw.call_async(foo, data)
        """
        if self._state == ExecutorState.finished or self._state == ExecutorState.error:
            raise Exception(
                'You cannot run pw.call_async() in the current state,'
                ' create a new pywren.ibm_cf_executor() instance.')

        future = self.executor.single_call(func, data, extra_env,
                                           extra_meta)[0]
        self.futures.append(future)

        return future

    def map(self,
            map_function,
            map_iterdata,
            extra_env=None,
            extra_meta=None,
            remote_invocation=False,
            invoke_pool_threads=10,
            data_all_as_one=True,
            overwrite_invoke_args=None,
            exclude_modules=None):
        """
        :param func: the function to map over the data
        :param iterdata: An iterable of input data
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :return: A list with size `len(iterdata)` of futures for each job
        :rtype: list of futures.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> futures = pw.map(foo, data_list)
        """
        if self._state == ExecutorState.finished or self._state == ExecutorState.error:
            raise Exception('You cannot run pw.map() in the current state.'
                            ' Create a new pywren.ibm_cf_executor() instance.')

        futures = self.executor.multiple_call(
            map_function=map_function,
            iterdata=map_iterdata,
            extra_env=extra_env,
            extra_meta=extra_meta,
            remote_invocation=remote_invocation,
            invoke_pool_threads=invoke_pool_threads,
            data_all_as_one=data_all_as_one,
            overwrite_invoke_args=overwrite_invoke_args,
            exclude_modules=exclude_modules)
        self.futures.extend(futures)

        if len(futures) == 1:
            return futures[0]
        return futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunk_size=None,
                   extra_env=None,
                   extra_meta=None,
                   remote_invocation=False,
                   reducer_one_per_object=False,
                   reducer_wait_local=True,
                   invoke_pool_threads=10,
                   data_all_as_one=True,
                   overwrite_invoke_args=None,
                   exclude_modules=None):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.
        :param map_function: the function to map over the data
        :param map_iterdata:  the function to reduce over the futures
        :param reduce_function:  the function to reduce over the futures
        :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_meta: Additional metadata to pass to action. Default None.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :return: A list with size `len(map_iterdata)` of futures for each job

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map_reduce(foo, map_data_list, bar)
        """

        if self._state == ExecutorState.finished or self._state == ExecutorState.error:
            raise Exception(
                'You cannot run pw.map_reduce() in the current state.'
                ' Create a new pywren.ibm_cf_executor() instance.')

        futures = self.executor.multiple_call(
            map_function,
            map_iterdata,
            reduce_function=reduce_function,
            obj_chunk_size=chunk_size,
            extra_env=extra_env,
            extra_meta=extra_meta,
            remote_invocation=remote_invocation,
            invoke_pool_threads=invoke_pool_threads,
            data_all_as_one=data_all_as_one,
            overwrite_invoke_args=overwrite_invoke_args,
            exclude_modules=exclude_modules,
            reducer_one_per_object=reducer_one_per_object,
            reducer_wait_local=reducer_wait_local)
        self.futures.extend(futures)

        if len(futures) == 1:
            return futures[0]
        return futures

    def wait(self,
             futures=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             THREADPOOL_SIZE=16,
             WAIT_DUR_SEC=2):
        """
        Wait for the Future instances `fs` to complete. Returns a 2-tuple of
        lists. The first list contains the futures that completed
        (finished or cancelled) before the wait completed. The second
        contains uncompleted futures.

        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: `(fs_dones, fs_notdones)`
            where `fs_dones` is a list of futures that have completed
            and `fs_notdones` is a list of futures that have not completed.
        :rtype: 2-tuple of lists

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data_list)
          >>> dones, not_dones = pw.wait()
          >>> # not_dones should be an empty list.
          >>> results = [f.result() for f in dones]
        """
        if not futures:
            futures = self.futures

        if not futures:
            raise Exception(
                'No activations to track. You must run pw.call_async(),'
                ' pw.map() or pw.map_reduce() before call pw.wait()')

        return wait(futures,
                    self.executor_id,
                    self.internal_storage,
                    throw_except=throw_except,
                    return_when=return_when,
                    THREADPOOL_SIZE=THREADPOOL_SIZE,
                    WAIT_DUR_SEC=WAIT_DUR_SEC)

    def get_result(self,
                   futures=None,
                   throw_except=True,
                   timeout=wrenconfig.CF_RUNTIME_TIMEOUT,
                   THREADPOOL_SIZE=64,
                   WAIT_DUR_SEC=2):
        """
        For getting PyWren results
        :param futures: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :return: The result of the future/s

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
          >>> pw.map(foo, data)
          >>> result = pw.get_result()
        """
        if futures:
            # Ensure futures is a list
            if type(futures) != list:
                ftrs = [futures]
            else:
                ftrs = futures
        else:
            # In this case self.futures is always a list
            ftrs = self.futures

        if not ftrs:
            raise Exception('You must run pw.call_async(), pw.map()'
                            ' or pw.map_reduce() before call pw.get_result()')

        msg = 'Executor ID {} Getting results'.format(self.executor_id)
        logger.debug(msg)
        if (logger.getEffectiveLevel() == logging.WARNING):
            print(msg)

        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(timeout)

        if self.cf_cluster or logger.getEffectiveLevel() != logging.WARNING:
            pbar = None
        else:
            import tqdm
            print()
            pbar = tqdm.tqdm(
                bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                total=len(ftrs),
                disable=False)

        try:
            wait(ftrs,
                 self.executor_id,
                 self.internal_storage,
                 throw_except=throw_except,
                 THREADPOOL_SIZE=THREADPOOL_SIZE,
                 WAIT_DUR_SEC=WAIT_DUR_SEC,
                 pbar=pbar)
            result = [f.result() for f in ftrs if f.done and not f.futures]

        except TimeoutError:
            if pbar:
                pbar.close()
                print()
            not_dones_activation_ids = set(
                [f.activation_id for f in ftrs if not f.done])
            msg = (
                'Executor ID {} Raised timeout of {} seconds getting results '
                '\nActivations not done: {}'.format(self.executor_id, timeout,
                                                    not_dones_activation_ids))
            logger.debug(msg)
            if (logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            self._state = ExecutorState.error
            result = None

        except KeyboardInterrupt:
            if pbar:
                pbar.close()
                print()
            not_dones_activation_ids = [
                f.activation_id for f in ftrs if not f.done
            ]
            msg = 'Executor ID {} Cancelled  \nActivations not done: {}'.format(
                self.executor_id, not_dones_activation_ids)
            logger.debug(msg)
            if (logger.getEffectiveLevel() == logging.WARNING):
                print(msg)
            if self.data_cleaner and not self.cf_cluster:
                self.clean()
            exit()

        finally:
            signal.alarm(0)
            if pbar:
                pbar.close()
                print()
            if self.data_cleaner and not self.cf_cluster:
                self.clean()

        msg = "Executor ID {} Finished\n".format(self.executor_id)
        logger.debug(msg)
        if (logger.getEffectiveLevel() == logging.WARNING
                and self.data_cleaner):
            print(msg)

        if result and len(result) == 1:
            return result[0]
        return result

    def create_timeline_plots(self,
                              dst,
                              name,
                              run_statuses=None,
                              invoke_statuses=None):
        """
        Creates timeline and histogram of the current execution in dst.

        :param dst: destination folder to save .png plots.
        :param name: name of the file.
        :param run_statuses: run statuses timestamps.
        :param invoke_statuses: invocation statuses timestamps.
        """
        from pywren_ibm_cloud.plots import create_timeline, create_histogram

        if self.futures and not run_statuses and not invoke_statuses:
            run_statuses = [f.run_status for f in self.futures]
            invoke_statuses = [f.invoke_status for f in self.futures]

        if not run_statuses and not invoke_statuses:
            raise Exception(
                'You must provide run_statuses and invoke_statuses')

        create_timeline(dst, name, run_statuses, invoke_statuses)
        create_histogram(dst, name, run_statuses, x_lim=150)

    def clean(self, local_execution=True):
        """
        Deletes all the files from COS. These files include the function,
        the data serialization and the function invocation results.
        """
        storage_bucket = self.storage_config['storage_bucket']
        storage_prerix = self.storage_config['storage_prefix']
        storage_prerix = os.path.join(storage_prerix, self.executor_id)

        msg = ("Executor ID {} Cleaning partial results from bucket '{}' "
               "and prefix '{}'".format(self.executor_id, storage_bucket,
                                        storage_prerix))
        logger.debug(msg)
        if (logger.getEffectiveLevel() == logging.WARNING):
            print(msg)
            if not self.data_cleaner:
                print()

        if local_execution:
            # 1st case: Not background. The main code waits until the cleaner finishes its execution.
            # It is not ideal for performance tests, since it can take long time to complete.
            #clean_os_bucket(storage_bucket, storage_prerix, self.internal_storage)

            # 2nd case: Execute in Background as a subprocess. The main program does not wait for its completion.
            storage_config = json.dumps(
                self.internal_storage.get_storage_config())
            storage_config = storage_config.replace('"', '\\"')

            cmdstr = (
                "{} -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \
                              clean_bucket(\"{}\", \"{}\", \"{}\")'".format(
                    sys.executable, storage_bucket, storage_prerix,
                    storage_config))
            os.popen(cmdstr)

        else:
            extra_env = {'STORE_STATUS': False, 'STORE_RESULT': False}
            sys.stdout = open(os.devnull, 'w')
            self.executor.call_async(clean_os_bucket,
                                     [storage_bucket, storage_prerix],
                                     extra_env=extra_env)
            sys.stdout = sys.__stdout__