def __init__(self, config=None, runtime=None, log_level=None, runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param runtime_timeout: Max time per action. Default 600 :return `executor` object. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() """ self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) if runtime: self.config['ibm_cf']['action_name'] = runtime if log_level: wrenlogging.default_config(log_level) ibm_cf_config = self.config['ibm_cf'] self.runtime = ibm_cf_config['action_name'] self.cf_cluster = ibm_cf_config['is_cf_cluster'] self.data_cleaner = self.config['pywren']['data_cleaner'] retry_config = {} retry_config['invocation_retry'] = self.config['pywren'][ 'invocation_retry'] retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps'] retry_config['retries'] = self.config['pywren']['retries'] invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config, retry_config) self.storage_config = wrenconfig.extract_storage_config(self.config) self.internal_storage = storage.InternalStorage(self.storage_config) self.executor = Executor(invoker, self.config, self.internal_storage, runtime_timeout) self.executor_id = self.executor.executor_id self.futures = [] self.reduce_future = None
def __init__(self, config=None, runtime=None, log_level=None, job_max_runtime=JOB_MAX_RUNTIME): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param job_max_runtime: Max time per lambda. Default 300 :return `executor` object. Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() """ self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) if runtime: self.config['ibm_cf']['action_name'] = runtime if log_level: wrenlogging.default_config(log_level) self._openwhisk = False if any([k.startswith('__OW_') for k in os.environ.keys()]): # OpenWhisk execution self._openwhisk = True wrenlogging.ow_config(logging.INFO) self.runtime = self.config['ibm_cf']['action_name'] ibm_cf_config = self.config['ibm_cf'] invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config) self.storage_config = wrenconfig.extract_storage_config(self.config) self.storage_handler = storage.Storage(self.storage_config) self.executor = Executor(invoker, self.config, self.storage_handler, job_max_runtime) self.executor_id = self.executor.executor_id self.futures = None self.reduce_future = None log_msg='IBM Cloud Functions executor created with ID {}'.format(self.executor_id) logger.info(log_msg) if(logger.getEffectiveLevel() == logging.WARNING): print(log_msg)
class ibm_cf_executor(object): def __init__(self, config=None, runtime=None, log_level=None, job_max_runtime=JOB_MAX_RUNTIME): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param job_max_runtime: Max time per lambda. Default 300 :return `executor` object. Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() """ self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) if runtime: self.config['ibm_cf']['action_name'] = runtime if log_level: wrenlogging.default_config(log_level) self._openwhisk = False if any([k.startswith('__OW_') for k in os.environ.keys()]): # OpenWhisk execution self._openwhisk = True wrenlogging.ow_config(logging.INFO) self.runtime = self.config['ibm_cf']['action_name'] ibm_cf_config = self.config['ibm_cf'] invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config) self.storage_config = wrenconfig.extract_storage_config(self.config) self.storage_handler = storage.Storage(self.storage_config) self.executor = Executor(invoker, self.config, self.storage_handler, job_max_runtime) self.executor_id = self.executor.executor_id self.futures = None self.reduce_future = None log_msg='IBM Cloud Functions executor created with ID {}'.format(self.executor_id) logger.info(log_msg) if(logger.getEffectiveLevel() == logging.WARNING): print(log_msg) def call_async(self, func, data, extra_env=None, extra_meta=None): """ For run one function execution :param func: the function to map over the data :param data: input data :param extra_env: Additional environment variables for lambda environment. Default None. :param extra_meta: Additional metadata to pass to lambda. Default None. Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> future = pw.call_async(foo, data) """ if not self._state == ExecutorState.new: raise Exception('You cannot run pw.call_async() in the current state,' ' create a new pywren.ibm_cf_executor() instance.') self._state = ExecutorState.single_call self.futures = self.executor.call_async(func, data, extra_env, extra_meta)[0] return self.futures def map(self, func, iterdata, extra_env=None, extra_meta=None, remote_invocation=False, invoke_pool_threads=10, data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None): """ :param func: the function to map over the data :param iterdata: An iterable of input data :param extra_env: Additional environment variables for lambda environment. Default None. :param extra_meta: Additional metadata to pass to lambda. Default None. :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures for each job :rtype: list of futures. Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> futures = pw.map(foo, data_list) """ if not self._state == ExecutorState.new: raise Exception('You cannot run pw.map() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') def remote_invoker(input_data): pw = pywren.ibm_cf_executor() return pw.map(func, input_data) if type(iterdata) != list: iterdata = list(iterdata) if len(iterdata) > 1 and remote_invocation: map_func = remote_invoker #map_iterdata = [[iterdata, ]] map_iterdata = [[iterdata[x:x+100]] for x in range(0, len(iterdata), 100)] invoke_pool_threads = 1 else: remote_invocation = False map_func = func map_iterdata = iterdata self.futures = self.executor.map(func=map_func, iterdata=map_iterdata, extra_env=extra_env, extra_meta=extra_meta, invoke_pool_threads=invoke_pool_threads, data_all_as_one=data_all_as_one, overwrite_invoke_args=overwrite_invoke_args, exclude_modules=exclude_modules, original_func_name=func.__name__) if remote_invocation: msg='Executor ID {} Getting remote invocations'.format(self.executor_id) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) #self.futures = self.futures[0].result(storage_handler=self.storage_handler) def fetch_future_results(f): f.result(storage_handler=self.storage_handler) return f pool = ThreadPool(32) pool.map(fetch_future_results, self.futures) new_futures = [f.result() for f in self.futures if f.done] self.futures = [] for futures_list in new_futures: self.futures.extend(futures_list) self._state = ExecutorState.map if type(self.futures) == list and len(self.futures) == 1: self.futures = self.futures[0] self._state = ExecutorState.single_call return self.futures def map_reduce(self, map_function, map_iterdata, reduce_function, chunk_size=64*1024**2, reducer_one_per_object = False, reducer_wait_local=True, throw_except=True, extra_env=None, extra_meta=None): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param reduce_function: the function to reduce over the futures :param map_iterdata: the function to reduce over the futures :param chunk_size: the size of the data chunks :param extra_env: Additional environment variables for lambda environment. Default None. :param extra_meta: Additional metadata to pass to lambda. Default None. :return: A list with size `len(map_iterdata)` of futures for each job Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map_reduce(foo, bar, data_list) """ if not self._state == ExecutorState.new: raise Exception('You cannot run pw.map_reduce() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') self.futures = self.executor.map_reduce(map_function, map_iterdata, reduce_function, chunk_size, reducer_one_per_object, reducer_wait_local, throw_except, extra_env, extra_meta) self._state = ExecutorState.map_reduce if type(self.futures) == list and len(self.futures) == 1: self.futures = self.futures[0] self._state = ExecutorState.single_call if type(self.futures) != list: self._state = ExecutorState.single_call return self.futures def wait(self, throw_except=True, verbose=True, return_when=ALL_COMPLETED, THREADPOOL_SIZE=64, WAIT_DUR_SEC=4): """ Wait for the Future instances `fs` to complete. Returns a 2-tuple of lists. The first list contains the futures that completed (finished or cancelled) before the wait completed. The second contains uncompleted futures. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_dones, fs_notdones)` where `fs_dones` is a list of futures that have completed and `fs_notdones` is a list of futures that have not completed. :rtype: 2-tuple of lists Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data_list) >>> dones, not_dones = pw.wait() >>> # not_dones should be an empty list. >>> results = [f.result() for f in dones] """ if not self._state == ExecutorState.map or not self._state == ExecutorState.map_reduce: raise Exception('You must run pw.map() or pw.map_reduce() before call pw.wait()') return wait(self.futures, self.executor_id, self.storage_handler, throw_except, verbose, return_when, THREADPOOL_SIZE, WAIT_DUR_SEC) def get_result(self, throw_except=True, verbose=False, timeout=JOB_MAX_RUNTIME): """ For get PyWren results :param throw_except: Reraise exception if call raised. Default true. :param verbose: Shows some information prints. :return: The result of the future/s Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> pw.call_async(foo, data) >>> result = pw.get_result() """ if self._state == ExecutorState.single_call: return self._get_result(throw_except=throw_except, verbose=verbose, timeout=timeout) else: return self._get_all_results(throw_except=throw_except, verbose=verbose, timeout=timeout) def _get_result(self, throw_except=True, verbose=False, timeout=JOB_MAX_RUNTIME): """ For get one function execution (future) result :param throw_except: Reraise exception if call raised. Default true. :param verbose: Shows some information prints. :return: The result of the call_async future Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> pw.call_async(foo, data) >>> result = pw._get_result() """ if self._state == ExecutorState.new: raise Exception('You must run pw.call_async(), or pw.map() ' 'or pw.map_reduce() before call pw.get_result()') msg='Executor ID {} Getting result'.format(self.executor_id) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) try: if not verbose: import tqdm print() pbar = tqdm.tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=1, disable=False) while not self.futures.done: result = self.futures.result(storage_handler=self.storage_handler, throw_except=throw_except, verbose=verbose) signal.alarm(timeout) if not verbose: pbar.update(1) pbar.close() print() self._state = ExecutorState.success except (TimeoutError, IndexError): if not verbose: if pbar: pbar.close() print() msg=('Executor ID {} Raised timeout of {} seconds getting the ' 'result from Activation ID {}'.format(self.executor_id, timeout, self.futures.activation_id)) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) self._state = ExecutorState.error result = None except KeyboardInterrupt: if not verbose: if pbar: pbar.close() print() msg='Executor ID {} Cancelled'.format(self.executor_id) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) exit() finally: signal.alarm(0) if not verbose: if pbar: pbar.close() self._clean() print() return result def _get_all_results(self, throw_except=True, verbose=False, timeout=JOB_MAX_RUNTIME, THREADPOOL_SIZE=64, WAIT_DUR_SEC=3): """ Take in a list of futures, call result on each one individually by using a threadpool, and return those results. Useful to fetch the results as they are produced. :param throw_except: Reraise exception if call raised. Default True. :param verbose: Show results (True) or progress bar (False). Default False. :return: A list of the results of each futures :rtype: list Usage >>> import pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data) >>> results = pw._get_all_results() """ if self._state == ExecutorState.new: raise Exception('You must run pw.map() or pw.map_reduce() ' 'before call pw.get_all_results()') msg='Executor ID {} Getting results'.format(self.executor_id) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) def timeout_handler(signum, frame): raise TimeoutError() signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) try: pool = ThreadPool(THREADPOOL_SIZE) def fetch_future_results(f): f.result(storage_handler=self.storage_handler, throw_except=throw_except, verbose=verbose) return f N = len(self.futures) if not verbose: import tqdm print() pbar = tqdm.tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=N, disable=False) callids_done_in_callset = set() call_ids = set() while len(callids_done_in_callset)<N: sleep = WAIT_DUR_SEC-((len(callids_done_in_callset)/N)*WAIT_DUR_SEC) time.sleep(sleep) current_call_ids = set([(f.callgroup_id, f.call_id) for f in self.futures]) call_ids = set(self.storage_handler.get_callset_status(self.executor_id)) call_ids_to_check = call_ids.intersection(current_call_ids) not_done_call_ids = call_ids_to_check.difference(callids_done_in_callset) still_not_done_futures = [f for f in self.futures if ((f.callgroup_id, f.call_id) in not_done_call_ids)] if verbose and still_not_done_futures: pool.map(fetch_future_results, still_not_done_futures) elif still_not_done_futures: futures = pool.map(fetch_future_results, still_not_done_futures) for f in futures: if f.done: pbar.update(1) pbar.refresh() callids_done_in_callset.update([(f.callgroup_id, f.call_id) for f in still_not_done_futures if f.done]) if not verbose: pbar.close() print() pool.close() self._state = ExecutorState.success except (TimeoutError, IndexError): if not verbose: if pbar: pbar.close() print() not_dones_activation_ids = set([f.activation_id for f in self.futures if not f.done]) msg='Executor ID {} Raised timeout of {} seconds getting results \nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) self._state = ExecutorState.error except KeyboardInterrupt: if not verbose: if pbar: pbar.close() print() not_dones_activation_ids = [f.activation_id for f in self.futures if not f.done] msg='Executor ID {} Cancelled \nActivations not done: {}'.format(self.executor_id, not_dones_activation_ids) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) exit() finally: if not verbose: if pbar: pbar.close() signal.alarm(0) self._clean() print() results = [f.result(throw_except=throw_except) for f in self.futures if f.done] return results def _clean(self, local_execution=True): """ Deletes all the files from COS. These files include the function, the data serialization and the function invocation results. """ storage_bucket = self.storage_config['storage_bucket'] storage_prerix = self.storage_config['storage_prefix'] storage_prerix = os.path.join(storage_prerix, self.executor_id) msg="Executor ID {} Cleaning partial results from PyWren bucket '{}'".format(self.executor_id, storage_bucket) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg) if local_execution: #storage_config = json.dumps(self.storage_handler.get_storage_config()) #storage_config = storage_config.replace('"', '\\"') ''' cmdstr = ("python3 -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \ clean_bucket(\"{}\", \"{}\", \"{}\")'".format(storage_bucket, storage_prerix, storage_config)) ''' clean_bucket(storage_bucket, storage_prerix, self.storage_config) #os.popen(cmdstr) else: extra_env = {'NOT_STORE_RESULTS': 'True'} sys.stdout = open(os.devnull, 'w') self.executor.call_async(clean_os_bucket, [storage_bucket, storage_prerix], extra_env=extra_env) sys.stdout = sys.__stdout__ self._state = ExecutorState.closed msg="Executor ID {} Finished".format(self.executor_id) logger.info(msg) if(logger.getEffectiveLevel() == logging.WARNING): print(msg)
def __init__(self, config=None, runtime=None, runtime_memory=None, log_level=None, rabbitmq_monitor=False): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param runtime_memory: memory to use in the runtime :param log_level: log level to use during the execution :param rabbitmq_monitor: use rabbitmq as monitoring system :return `executor` object. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() """ self.start_time = time.time() self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) self.is_cf_cluster = is_cf_cluster() self.data_cleaner = self.config['pywren']['data_cleaner'] # Overwrite runtime variables if runtime: self.config['pywren']['runtime'] = runtime if runtime_memory: self.config['pywren']['runtime_memory'] = int(runtime_memory) # Log level Configuration self.log_level = log_level if not self.log_level: if (logger.getEffectiveLevel() != logging.WARNING): self.log_level = logging.getLevelName( logger.getEffectiveLevel()) if self.log_level: os.environ["PYWREN_LOG_LEVEL"] = self.log_level if not self.is_cf_cluster: wrenlogging.default_config(self.log_level) # RabbitMQ monitor configuration self.rabbitmq_monitor = rabbitmq_monitor if self.rabbitmq_monitor: if self.config['rabbitmq']['amqp_url']: os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True' else: self.rabbitmq_monitor = False else: self.config['rabbitmq']['amqp_url'] = None storage_config = wrenconfig.extract_storage_config(self.config) self.internal_storage = storage.InternalStorage(storage_config) invoker = invokers.IBMCloudFunctionsInvoker(self.config) self.executor = Executor(invoker, self.config, self.internal_storage) self.executor_id = self.executor.executor_id self.futures = []
class ibm_cf_executor: def __init__(self, config=None, runtime=None, runtime_memory=None, log_level=None, rabbitmq_monitor=False): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param runtime_memory: memory to use in the runtime :param log_level: log level to use during the execution :param rabbitmq_monitor: use rabbitmq as monitoring system :return `executor` object. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() """ self.start_time = time.time() self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) self.is_cf_cluster = is_cf_cluster() self.data_cleaner = self.config['pywren']['data_cleaner'] # Overwrite runtime variables if runtime: self.config['pywren']['runtime'] = runtime if runtime_memory: self.config['pywren']['runtime_memory'] = int(runtime_memory) # Log level Configuration self.log_level = log_level if not self.log_level: if (logger.getEffectiveLevel() != logging.WARNING): self.log_level = logging.getLevelName( logger.getEffectiveLevel()) if self.log_level: os.environ["PYWREN_LOG_LEVEL"] = self.log_level if not self.is_cf_cluster: wrenlogging.default_config(self.log_level) # RabbitMQ monitor configuration self.rabbitmq_monitor = rabbitmq_monitor if self.rabbitmq_monitor: if self.config['rabbitmq']['amqp_url']: os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True' else: self.rabbitmq_monitor = False else: self.config['rabbitmq']['amqp_url'] = None storage_config = wrenconfig.extract_storage_config(self.config) self.internal_storage = storage.InternalStorage(storage_config) invoker = invokers.IBMCloudFunctionsInvoker(self.config) self.executor = Executor(invoker, self.config, self.internal_storage) self.executor_id = self.executor.executor_id self.futures = [] def call_async(self, func, data, extra_env=None, extra_meta=None, timeout=wrenconfig.RUNTIME_TIMEOUT): """ For run one function execution :param func: the function to map over the data :param data: input data :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> future = pw.call_async(foo, data) """ if self._state == ExecutorState.finished: raise Exception( 'You cannot run pw.call_async() in the current state,' ' create a new pywren.ibm_cf_executor() instance.') future = self.executor.call_async(func, data, extra_env, extra_meta, timeout)[0] self.futures.append(future) self._state = ExecutorState.running return future def map(self, map_function, map_iterdata, extra_env=None, extra_meta=None, chunk_size=None, remote_invocation=False, timeout=wrenconfig.RUNTIME_TIMEOUT, remote_invocation_groups=None, invoke_pool_threads=500, data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None): """ :param func: the function to map over the data :param iterdata: An iterable of input data :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map :param data_type: the type of the data. Now allowed: None (files with newline) and csv. :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures for each job :rtype: list of futures. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> futures = pw.map(foo, data_list) """ if self._state == ExecutorState.finished: raise Exception('You cannot run pw.map() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') inv_action_name = self.executor.invoker.action_name if len(map_iterdata) == 1 or self.is_cf_cluster: # Ensure no remote invocation in these particular cases remote_invocation = False if remote_invocation: ria_memory = wrenconfig.RUNTIME_RI_MEMORY_DEFAULT self.executor.invoker.action_name = create_ri_action_name( inv_action_name, ria_memory) map_futures, unused_ppo = self.executor.map( map_function=map_function, iterdata=map_iterdata, obj_chunk_size=chunk_size, extra_env=extra_env, extra_meta=extra_meta, remote_invocation=remote_invocation, remote_invocation_groups=remote_invocation_groups, invoke_pool_threads=invoke_pool_threads, data_all_as_one=data_all_as_one, overwrite_invoke_args=overwrite_invoke_args, exclude_modules=exclude_modules, job_max_runtime=timeout) self.futures.extend(map_futures) self.executor.invoker.action_name = inv_action_name self._state = ExecutorState.running if len(map_futures) == 1: return map_futures[0] return map_futures def map_reduce(self, map_function, map_iterdata, reduce_function, extra_env=None, extra_meta=None, chunk_size=None, remote_invocation=False, remote_invocation_groups=None, timeout=wrenconfig.RUNTIME_TIMEOUT, reducer_one_per_object=False, reducer_wait_local=False, invoke_pool_threads=500, data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: the function to reduce over the futures :param reduce_function: the function to reduce over the futures :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map :param data_type: the type of the data. Now allowed: None (files with newline) and csv. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures for each job Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map_reduce(foo, map_data_list, bar) """ if self._state == ExecutorState.finished: raise Exception( 'You cannot run pw.map_reduce() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') inv_action_name = self.executor.invoker.action_name if len(map_iterdata) == 1 or self.is_cf_cluster: # Ensure no remote invocation in these particular cases remote_invocation = False if remote_invocation: ria_memory = wrenconfig.RUNTIME_RI_MEMORY_DEFAULT self.executor.invoker.action_name = create_ri_action_name( inv_action_name, ria_memory) map_futures, parts_per_object = self.executor.map( map_function, map_iterdata, extra_env=extra_env, extra_meta=extra_meta, obj_chunk_size=chunk_size, remote_invocation=remote_invocation, remote_invocation_groups=remote_invocation_groups, invoke_pool_threads=invoke_pool_threads, data_all_as_one=data_all_as_one, overwrite_invoke_args=overwrite_invoke_args, exclude_modules=exclude_modules, job_max_runtime=timeout) self._state = ExecutorState.running if reducer_wait_local: self.monitor(futures=map_futures) self.executor.invoker.action_name = inv_action_name futures = self.executor.reduce(reduce_function, map_futures, parts_per_object, reducer_one_per_object, extra_env, extra_meta) self.futures.extend(futures) if len(futures) == 1: return futures[0] return futures def monitor(self, futures=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=wrenconfig.RUNTIME_TIMEOUT, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances `fs` to complete. Returns a 2-tuple of lists. The first list contains the futures that completed (finished or cancelled) before the wait completed. The second contains uncompleted futures. :param futures: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only download statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of lists Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data_list) >>> dones, not_dones = pw.monitor() >>> # not_dones should be an empty list. >>> results = [f.result() for f in dones] """ if futures: # Ensure futures is a list if type(futures) != list: ftrs = [futures] else: ftrs = futures else: # In this case self.futures is always a list ftrs = self.futures if not ftrs: raise Exception('You must run pw.call_async(), pw.map()' ' or pw.map_reduce() before call pw.get_result()') rabbit_amqp_url = None if self._state == ExecutorState.running: if self.rabbitmq_monitor: rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') if rabbit_amqp_url and not download_results: logger.info( 'Going to use RabbitMQ to monitor function activations') if download_results: msg = 'Executor ID {} Getting results...'.format( self.executor_id) else: msg = 'Executor ID {} Waiting for functions to complete...'.format( self.executor_id) logger.info(msg) if not self.log_level and self._state == ExecutorState.running: print(msg) if is_unix_system(): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) pbar = None if not self.is_cf_cluster and self._state == ExecutorState.running \ and not self.log_level and not is_notebook(): import tqdm print() pbar = tqdm.tqdm( bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(ftrs), disable=False) try: wait(ftrs, self.executor_id, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, rabbit_amqp_url=rabbit_amqp_url, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except TimeoutError: if download_results: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.done ] else: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.ready ] msg = ( 'Executor ID {} Raised timeout of {} seconds waiting for results ' '\nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids)) self._state = ExecutorState.error except KeyboardInterrupt: if download_results: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.done ] else: not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.ready ] msg = 'Executor ID {} Cancelled \nActivations not done: {}'.format( self.executor_id, not_dones_activation_ids) self._state = ExecutorState.error finally: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() print() if self._state == ExecutorState.error: logger.info(msg) if not self.log_level: print(msg) if self.data_cleaner and not self.is_cf_cluster and self._state != ExecutorState.ready: self.clean() if download_results: fs_dones = [f for f in ftrs if f.done] fs_notdones = [f for f in ftrs if not f.done] else: fs_dones = [f for f in ftrs if f.ready] fs_notdones = [f for f in ftrs if not f.ready] self._state = ExecutorState.ready return fs_dones, fs_notdones def get_result(self, futures=None, throw_except=True, timeout=wrenconfig.RUNTIME_TIMEOUT, THREADPOOL_SIZE=64, WAIT_DUR_SEC=1): """ For getting PyWren results :param futures: Futures list. Default None :param throw_except: Reraise exception if call raised. Default True. :param verbose: Shows some information prints. Default False :param timeout: Timeout for waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: The result of the future/s Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data) >>> results = pw.get_result() """ fs_dones, unused_fs_notdones = self.monitor( futures=futures, throw_except=throw_except, timeout=timeout, download_results=True, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) result = [f.result() for f in fs_dones if f.done and not f.futures] msg = "Executor ID {} Finished getting results".format( self.executor_id) logger.info(msg) if not self.log_level: print(msg) if result and len(result) == 1: return result[0] return result def create_timeline_plots(self, dst_dir, dst_file_name, futures=None): """ Creates timeline and histogram of the current execution in dst. :param dst: destination folder to save .png plots. :param name: name of the file. :param run_statuses: run statuses timestamps. :param invoke_statuses: invocation statuses timestamps. """ if futures: ftrs = futures else: ftrs = self.futures if not ftrs or self._state == ExecutorState.new: raise Exception( 'You must run pw.call_async(), pw.map() or pw.map_reduce()' ' before call pw.create_timeline_plots()') logging.getLogger('matplotlib').setLevel(logging.WARNING) from pywren_ibm_cloud.plots import create_timeline, create_histogram msg = 'Executor ID {} Creating timeline plots'.format(self.executor_id) logger.info(msg) if not self.log_level: print(msg) if self.data_cleaner: print() if self.rabbitmq_monitor and not futures: ftrs_to_plot = self.futures self.monitor(futures=ftrs_to_plot) else: ftrs_to_plot = [f for f in ftrs if f.ready or f.done] if not ftrs_to_plot: return run_statuses = [f.run_status for f in ftrs_to_plot] invoke_statuses = [f.invoke_status for f in ftrs_to_plot] if self.rabbitmq_monitor and invoke_statuses: for in_stat in invoke_statuses: del in_stat['status_done_timestamp'] create_timeline(dst_dir, dst_file_name, self.start_time, run_statuses, invoke_statuses, self.config['ibm_cos']) create_histogram(dst_dir, dst_file_name, self.start_time, run_statuses, self.config['ibm_cos']) def clean(self, local_execution=True): """ Deletes all the files from COS. These files include the function, the data serialization and the function invocation results. """ storage_bucket = self.config['pywren']['storage_bucket'] storage_prerix = self.config['pywren']['storage_prefix'] storage_prerix = os.path.join(storage_prerix, self.executor_id).replace("\\", "/") msg = "Executor ID {} Cleaning partial results from cos://{}/{}".format( self.executor_id, storage_bucket, storage_prerix) logger.info(msg) if not self.log_level: print(msg) if not self.data_cleaner: print() if local_execution: # 1st case: Not background. The main code waits until the cleaner finishes its execution. # It is not ideal for performance tests, since it can take long time to complete. # clean_os_bucket(storage_bucket, storage_prerix, self.internal_storage) # 2nd case: Execute in Background as a subprocess. The main program does not wait for its completion. storage_config = json.dumps( self.internal_storage.get_storage_config()) storage_config = storage_config.replace('"', '\\"') cmdstr = ( "{} -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \ clean_bucket(\"{}\", \"{}\", \"{}\")'".format( sys.executable, storage_bucket, storage_prerix, storage_config)) os.popen(cmdstr) else: extra_env = {'STORE_STATUS': False, 'STORE_RESULT': False} sys.stdout = open(os.devnull, 'w') self.executor.call_async(clean_os_bucket, [storage_bucket, storage_prerix], extra_env=extra_env) sys.stdout = sys.__stdout__ self._state = ExecutorState.finished
class ibm_cf_executor: def __init__(self, config=None, runtime=None, log_level=None, runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT): """ Initialize and return an executor class. :param config: Settings passed in here will override those in `pywren_config`. Default None. :param runtime: Runtime name to use. Default None. :param runtime_timeout: Max time per action. Default 600 :return `executor` object. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() """ self._state = ExecutorState.new if config is None: self.config = wrenconfig.default() else: self.config = wrenconfig.default(config) if runtime: self.config['ibm_cf']['action_name'] = runtime if log_level: wrenlogging.default_config(log_level) ibm_cf_config = self.config['ibm_cf'] self.runtime = ibm_cf_config['action_name'] self.cf_cluster = ibm_cf_config['is_cf_cluster'] self.data_cleaner = self.config['pywren']['data_cleaner'] retry_config = {} retry_config['invocation_retry'] = self.config['pywren'][ 'invocation_retry'] retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps'] retry_config['retries'] = self.config['pywren']['retries'] invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config, retry_config) self.storage_config = wrenconfig.extract_storage_config(self.config) self.internal_storage = storage.InternalStorage(self.storage_config) self.executor = Executor(invoker, self.config, self.internal_storage, runtime_timeout) self.executor_id = self.executor.executor_id self.futures = [] self.reduce_future = None def call_async(self, func, data, extra_env=None, extra_meta=None): """ For run one function execution :param func: the function to map over the data :param data: input data :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> future = pw.call_async(foo, data) """ if self._state == ExecutorState.finished or self._state == ExecutorState.error: raise Exception( 'You cannot run pw.call_async() in the current state,' ' create a new pywren.ibm_cf_executor() instance.') future = self.executor.single_call(func, data, extra_env, extra_meta)[0] self.futures.append(future) return future def map(self, map_function, map_iterdata, extra_env=None, extra_meta=None, remote_invocation=False, invoke_pool_threads=10, data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None): """ :param func: the function to map over the data :param iterdata: An iterable of input data :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures for each job :rtype: list of futures. Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> futures = pw.map(foo, data_list) """ if self._state == ExecutorState.finished or self._state == ExecutorState.error: raise Exception('You cannot run pw.map() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') futures = self.executor.multiple_call( map_function=map_function, iterdata=map_iterdata, extra_env=extra_env, extra_meta=extra_meta, remote_invocation=remote_invocation, invoke_pool_threads=invoke_pool_threads, data_all_as_one=data_all_as_one, overwrite_invoke_args=overwrite_invoke_args, exclude_modules=exclude_modules) self.futures.extend(futures) if len(futures) == 1: return futures[0] return futures def map_reduce(self, map_function, map_iterdata, reduce_function, chunk_size=None, extra_env=None, extra_meta=None, remote_invocation=False, reducer_one_per_object=False, reducer_wait_local=True, invoke_pool_threads=10, data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: the function to reduce over the futures :param reduce_function: the function to reduce over the futures :param chunk_size: the size of the data chunks. 'None' for processing the whole file in one map :param extra_env: Additional environment variables for action environment. Default None. :param extra_meta: Additional metadata to pass to action. Default None. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures for each job Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map_reduce(foo, map_data_list, bar) """ if self._state == ExecutorState.finished or self._state == ExecutorState.error: raise Exception( 'You cannot run pw.map_reduce() in the current state.' ' Create a new pywren.ibm_cf_executor() instance.') futures = self.executor.multiple_call( map_function, map_iterdata, reduce_function=reduce_function, obj_chunk_size=chunk_size, extra_env=extra_env, extra_meta=extra_meta, remote_invocation=remote_invocation, invoke_pool_threads=invoke_pool_threads, data_all_as_one=data_all_as_one, overwrite_invoke_args=overwrite_invoke_args, exclude_modules=exclude_modules, reducer_one_per_object=reducer_one_per_object, reducer_wait_local=reducer_wait_local) self.futures.extend(futures) if len(futures) == 1: return futures[0] return futures def wait(self, futures=None, throw_except=True, return_when=ALL_COMPLETED, THREADPOOL_SIZE=16, WAIT_DUR_SEC=2): """ Wait for the Future instances `fs` to complete. Returns a 2-tuple of lists. The first list contains the futures that completed (finished or cancelled) before the wait completed. The second contains uncompleted futures. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_dones, fs_notdones)` where `fs_dones` is a list of futures that have completed and `fs_notdones` is a list of futures that have not completed. :rtype: 2-tuple of lists Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data_list) >>> dones, not_dones = pw.wait() >>> # not_dones should be an empty list. >>> results = [f.result() for f in dones] """ if not futures: futures = self.futures if not futures: raise Exception( 'No activations to track. You must run pw.call_async(),' ' pw.map() or pw.map_reduce() before call pw.wait()') return wait(futures, self.executor_id, self.internal_storage, throw_except=throw_except, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) def get_result(self, futures=None, throw_except=True, timeout=wrenconfig.CF_RUNTIME_TIMEOUT, THREADPOOL_SIZE=64, WAIT_DUR_SEC=2): """ For getting PyWren results :param futures: Futures list. Default None :param throw_except: Reraise exception if call raised. Default True. :param verbose: Shows some information prints. Default False :param timeout: Timeout for waiting results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :return: The result of the future/s Usage >>> import pywren_ibm_cloud as pywren >>> pw = pywren.ibm_cf_executor() >>> pw.map(foo, data) >>> result = pw.get_result() """ if futures: # Ensure futures is a list if type(futures) != list: ftrs = [futures] else: ftrs = futures else: # In this case self.futures is always a list ftrs = self.futures if not ftrs: raise Exception('You must run pw.call_async(), pw.map()' ' or pw.map_reduce() before call pw.get_result()') msg = 'Executor ID {} Getting results'.format(self.executor_id) logger.debug(msg) if (logger.getEffectiveLevel() == logging.WARNING): print(msg) signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) if self.cf_cluster or logger.getEffectiveLevel() != logging.WARNING: pbar = None else: import tqdm print() pbar = tqdm.tqdm( bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(ftrs), disable=False) try: wait(ftrs, self.executor_id, self.internal_storage, throw_except=throw_except, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC, pbar=pbar) result = [f.result() for f in ftrs if f.done and not f.futures] except TimeoutError: if pbar: pbar.close() print() not_dones_activation_ids = set( [f.activation_id for f in ftrs if not f.done]) msg = ( 'Executor ID {} Raised timeout of {} seconds getting results ' '\nActivations not done: {}'.format(self.executor_id, timeout, not_dones_activation_ids)) logger.debug(msg) if (logger.getEffectiveLevel() == logging.WARNING): print(msg) self._state = ExecutorState.error result = None except KeyboardInterrupt: if pbar: pbar.close() print() not_dones_activation_ids = [ f.activation_id for f in ftrs if not f.done ] msg = 'Executor ID {} Cancelled \nActivations not done: {}'.format( self.executor_id, not_dones_activation_ids) logger.debug(msg) if (logger.getEffectiveLevel() == logging.WARNING): print(msg) if self.data_cleaner and not self.cf_cluster: self.clean() exit() finally: signal.alarm(0) if pbar: pbar.close() print() if self.data_cleaner and not self.cf_cluster: self.clean() msg = "Executor ID {} Finished\n".format(self.executor_id) logger.debug(msg) if (logger.getEffectiveLevel() == logging.WARNING and self.data_cleaner): print(msg) if result and len(result) == 1: return result[0] return result def create_timeline_plots(self, dst, name, run_statuses=None, invoke_statuses=None): """ Creates timeline and histogram of the current execution in dst. :param dst: destination folder to save .png plots. :param name: name of the file. :param run_statuses: run statuses timestamps. :param invoke_statuses: invocation statuses timestamps. """ from pywren_ibm_cloud.plots import create_timeline, create_histogram if self.futures and not run_statuses and not invoke_statuses: run_statuses = [f.run_status for f in self.futures] invoke_statuses = [f.invoke_status for f in self.futures] if not run_statuses and not invoke_statuses: raise Exception( 'You must provide run_statuses and invoke_statuses') create_timeline(dst, name, run_statuses, invoke_statuses) create_histogram(dst, name, run_statuses, x_lim=150) def clean(self, local_execution=True): """ Deletes all the files from COS. These files include the function, the data serialization and the function invocation results. """ storage_bucket = self.storage_config['storage_bucket'] storage_prerix = self.storage_config['storage_prefix'] storage_prerix = os.path.join(storage_prerix, self.executor_id) msg = ("Executor ID {} Cleaning partial results from bucket '{}' " "and prefix '{}'".format(self.executor_id, storage_bucket, storage_prerix)) logger.debug(msg) if (logger.getEffectiveLevel() == logging.WARNING): print(msg) if not self.data_cleaner: print() if local_execution: # 1st case: Not background. The main code waits until the cleaner finishes its execution. # It is not ideal for performance tests, since it can take long time to complete. #clean_os_bucket(storage_bucket, storage_prerix, self.internal_storage) # 2nd case: Execute in Background as a subprocess. The main program does not wait for its completion. storage_config = json.dumps( self.internal_storage.get_storage_config()) storage_config = storage_config.replace('"', '\\"') cmdstr = ( "{} -c 'from pywren_ibm_cloud.storage.cleaner import clean_bucket; \ clean_bucket(\"{}\", \"{}\", \"{}\")'".format( sys.executable, storage_bucket, storage_prerix, storage_config)) os.popen(cmdstr) else: extra_env = {'STORE_STATUS': False, 'STORE_RESULT': False} sys.stdout = open(os.devnull, 'w') self.executor.call_async(clean_os_bucket, [storage_bucket, storage_prerix], extra_env=extra_env) sys.stdout = sys.__stdout__