def function_invoker(job_payload): """ Method used as a remote invoker """ config = job_payload['config'] job = SimpleNamespace(**job_payload['job']) env = { 'LITHOPS_WORKER': 'True', 'PYTHONUNBUFFERED': 'True', '__LITHOPS_SESSION_ID': job.job_key } os.environ.update(env) # Create the monitoring system monitoring_backend = config['lithops']['monitoring'].lower() monitoring_config = config.get(monitoring_backend) job_monitor = JobMonitor(monitoring_backend, monitoring_config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) serverless_config = extract_serverless_config(config) compute_handler = ServerlessHandler(serverless_config, storage_config) # Create the invokder invoker = FaaSRemoteInvoker(config, job.executor_id, internal_storage, compute_handler, job_monitor) invoker.run_job(job)
def __init__(self, mode=None, config=None, backend=None, storage=None, runtime=None, runtime_memory=None, monitoring=None, workers=None, remote_invoker=None, log_level=False): """ Create a FunctionExecutor Class """ if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]: raise Exception("Function executor mode must be one of '{}', '{}' " "or '{}'".format(LOCALHOST, SERVERLESS, STANDALONE)) self.is_lithops_worker = is_lithops_worker() # setup lithops logging if not self.is_lithops_worker: # if is lithops worker, logging has been set up in entry_point.py if log_level: setup_lithops_logger(log_level) elif log_level is False and logger.getEffectiveLevel( ) == logging.WARNING: # Set default logging from config setup_lithops_logger(*get_log_info(config)) # load mode of execution mode = mode or get_mode(backend, config) config_ow = {'lithops': {'mode': mode}, mode: {}} # overwrite user-provided parameters if runtime is not None: config_ow[mode]['runtime'] = runtime if backend is not None: config_ow[mode]['backend'] = backend if runtime_memory is not None: config_ow[mode]['runtime_memory'] = int(runtime_memory) if remote_invoker is not None: config_ow[mode]['remote_invoker'] = remote_invoker if storage is not None: config_ow['lithops']['storage'] = storage if workers is not None: config_ow['lithops']['workers'] = workers if monitoring is not None: config_ow['lithops']['monitoring'] = monitoring self.config = default_config(copy.deepcopy(config), config_ow) self.executor_id = create_executor_id() self.data_cleaner = self.config['lithops'].get('data_cleaner', True) if self.data_cleaner and not self.is_lithops_worker: spawn_cleaner = int(self.executor_id.split('-')[1]) == 0 atexit.register(self.clean, spawn_cleaner=spawn_cleaner, clean_cloudobjects=False) storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.storage = self.internal_storage.storage self.futures = [] self.cleaned_jobs = set() self.total_jobs = 0 self.last_call = None if mode == LOCALHOST: localhost_config = extract_localhost_config(self.config) self.compute_handler = LocalhostHandler(localhost_config) elif mode == SERVERLESS: serverless_config = extract_serverless_config(self.config) self.compute_handler = ServerlessHandler(serverless_config, self.internal_storage) elif mode == STANDALONE: standalone_config = extract_standalone_config(self.config) self.compute_handler = StandaloneHandler(standalone_config) # Create the monitoring system monitoring_backend = self.config['lithops']['monitoring'].lower() monitoring_config = self.config.get(monitoring_backend) self.job_monitor = JobMonitor(monitoring_backend, monitoring_config) # Create the invokder self.invoker = create_invoker(self.config, self.executor_id, self.internal_storage, self.compute_handler, self.job_monitor) logger.info('{} Executor created with ID: {}'.format( mode.capitalize(), self.executor_id)) self.log_path = None
class FunctionExecutor: """ Executor abstract class that contains the common logic for the Localhost, Serverless and Standalone executors """ def __init__(self, mode=None, config=None, backend=None, storage=None, runtime=None, runtime_memory=None, monitoring=None, workers=None, remote_invoker=None, log_level=False): """ Create a FunctionExecutor Class """ if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]: raise Exception("Function executor mode must be one of '{}', '{}' " "or '{}'".format(LOCALHOST, SERVERLESS, STANDALONE)) self.is_lithops_worker = is_lithops_worker() # setup lithops logging if not self.is_lithops_worker: # if is lithops worker, logging has been set up in entry_point.py if log_level: setup_lithops_logger(log_level) elif log_level is False and logger.getEffectiveLevel( ) == logging.WARNING: # Set default logging from config setup_lithops_logger(*get_log_info(config)) # load mode of execution mode = mode or get_mode(backend, config) config_ow = {'lithops': {'mode': mode}, mode: {}} # overwrite user-provided parameters if runtime is not None: config_ow[mode]['runtime'] = runtime if backend is not None: config_ow[mode]['backend'] = backend if runtime_memory is not None: config_ow[mode]['runtime_memory'] = int(runtime_memory) if remote_invoker is not None: config_ow[mode]['remote_invoker'] = remote_invoker if storage is not None: config_ow['lithops']['storage'] = storage if workers is not None: config_ow['lithops']['workers'] = workers if monitoring is not None: config_ow['lithops']['monitoring'] = monitoring self.config = default_config(copy.deepcopy(config), config_ow) self.executor_id = create_executor_id() self.data_cleaner = self.config['lithops'].get('data_cleaner', True) if self.data_cleaner and not self.is_lithops_worker: spawn_cleaner = int(self.executor_id.split('-')[1]) == 0 atexit.register(self.clean, spawn_cleaner=spawn_cleaner, clean_cloudobjects=False) storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.storage = self.internal_storage.storage self.futures = [] self.cleaned_jobs = set() self.total_jobs = 0 self.last_call = None if mode == LOCALHOST: localhost_config = extract_localhost_config(self.config) self.compute_handler = LocalhostHandler(localhost_config) elif mode == SERVERLESS: serverless_config = extract_serverless_config(self.config) self.compute_handler = ServerlessHandler(serverless_config, self.internal_storage) elif mode == STANDALONE: standalone_config = extract_standalone_config(self.config) self.compute_handler = StandaloneHandler(standalone_config) # Create the monitoring system monitoring_backend = self.config['lithops']['monitoring'].lower() monitoring_config = self.config.get(monitoring_backend) self.job_monitor = JobMonitor(monitoring_backend, monitoring_config) # Create the invokder self.invoker = create_invoker(self.config, self.executor_id, self.internal_storage, self.compute_handler, self.job_monitor) logger.info('{} Executor created with ID: {}'.format( mode.capitalize(), self.executor_id)) self.log_path = None def __enter__(self): """ Context manager method """ return self def __exit__(self, exc_type, exc_value, traceback): """ Context manager method """ self.job_monitor.stop() self.invoker.stop() def _create_job_id(self, call_type): job_id = str(self.total_jobs).zfill(3) self.total_jobs += 1 return '{}{}'.format(call_type, job_id) def call_async(self, func, data, extra_env=None, runtime_memory=None, timeout=None, include_modules=[], exclude_modules=[]): """ For running one function execution asynchronously :param func: the function to map over the data :param data: input data :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param timeout: Time that the functions have to complete their execution before raising a timeout :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: future object. """ job_id = self._create_job_id('A') self.last_call = 'call_async' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=func, iterdata=[data], runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures[0] def map(self, map_function, map_iterdata, chunksize=None, worker_processes=None, extra_args=None, extra_env=None, runtime_memory=None, chunk_size=None, chunk_n=None, obj_chunk_size=None, obj_chunk_number=None, timeout=None, invoke_pool_threads=None, include_modules=[], exclude_modules=[]): """ For running multiple function executions asynchronously :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1 :param extra_args: Additional args to pass to the function activations :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation :param remote_invocation: Enable or disable remote_invocation mechanism :param timeout: Time that the functions have to complete their execution before raising a timeout :param invoke_pool_threads: Number of threads to use to invoke :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: A list with size `len(iterdata)` of futures. """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, chunk_size=chunk_size, chunk_n=chunk_n, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, invoke_pool_threads=invoke_pool_threads) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures def map_reduce(self, map_function, map_iterdata, reduce_function, chunksize=None, worker_processes=None, extra_args=None, extra_env=None, map_runtime_memory=None, obj_chunk_size=None, obj_chunk_number=None, reduce_runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=None, reducer_one_per_object=False, reducer_wait_local=False, include_modules=[], exclude_modules=[]): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param worker_processes: Number of concurrent/parallel processes in each worker Default 1 :param reduce_function: the function to reduce over the futures :param extra_env: Additional environment variables for action environment. Default None. :param extra_args: Additional arguments to pass to function activation. Default None. :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config). :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config). :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation. :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False' :param timeout: Time that the functions have to complete their execution before raising a timeout. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(self.config, self.internal_storage, self.executor_id, map_job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, chunk_size=chunk_size, chunk_n=chunk_n, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, invoke_pool_threads=invoke_pool_threads) map_futures = self.invoker.run_job(map_job) self.futures.extend(map_futures) if reducer_wait_local: wait(fs=map_futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job( self.config, self.internal_storage, self.executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run_job(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return map_futures + reduce_futures def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=None, threadpool_size=THREADPOOL_SIZE, wait_dur_sec=WAIT_DUR_SEC): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param threadpool_size: Number of threads to use. Default 64 :param wait_dur_sec: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ futures = fs or self.futures if type(futures) != list: futures = [futures] # Start waiting for results try: wait(fs=futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor, download_results=download_results, throw_except=throw_except, return_when=return_when, timeout=timeout, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) except Exception as e: self.invoker.stop() if not fs and is_notebook(): del self.futures[len(self.futures) - len(futures):] if self.data_cleaner and not self.is_lithops_worker: self.clean(clean_cloudobjects=False, force=True) raise e finally: present_jobs = {f.job_key for f in futures} self.job_monitor.stop(present_jobs) if self.data_cleaner and not self.is_lithops_worker: self.clean(clean_cloudobjects=False) if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.success or f.done] fs_notdone = [f for f in futures if not f.success and not f.done] return fs_done, fs_notdone def get_result(self, fs=None, throw_except=True, timeout=None, threadpool_size=THREADPOOL_SIZE, wait_dur_sec=WAIT_DUR_SEC): """ For getting the results from all function activations :param fs: Futures list. Default None :param throw_except: Reraise exception if call raised. Default True. :param verbose: Shows some information prints. Default False :param timeout: Timeout for waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 128 :param WAIT_DUR_SEC: Time interval between each check. :return: The result of the future/s """ fs_done, _ = self.wait(fs=fs, throw_except=throw_except, timeout=timeout, download_results=True, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) result = [] fs_done = [f for f in fs_done if not f.futures and f._produce_output] for f in fs_done: if fs: # Process futures provided by the user result.append( f.result(throw_except=throw_except, internal_storage=self.internal_storage)) elif not fs and not f._read: # Process internally stored futures result.append( f.result(throw_except=throw_except, internal_storage=self.internal_storage)) f._read = True logger.debug("ExecutorID {} Finished getting results".format( self.executor_id)) if len(result) == 1 and self.last_call != 'map': return result[0] return result def plot(self, fs=None, dst=None): """ Creates timeline and histogram of the current execution in dst_dir. :param dst_dir: destination folder to save .png plots. :param dst_file_name: prefix name of the file. :param fs: list of futures. """ ftrs = self.futures if not fs else fs if type(ftrs) != list: ftrs = [ftrs] ftrs_to_plot = [ f for f in ftrs if (f.success or f.done) and not f.error ] if not ftrs_to_plot: logger.debug('ExecutorID {} - No futures ready to plot'.format( self.executor_id)) return logging.getLogger('matplotlib').setLevel(logging.WARNING) from lithops.plots import create_timeline, create_histogram logger.info('ExecutorID {} - Creating execution plots'.format( self.executor_id)) create_timeline(ftrs_to_plot, dst) create_histogram(ftrs_to_plot, dst) def clean(self, fs=None, cs=None, clean_cloudobjects=True, spawn_cleaner=True, force=False): """ Deletes all the temp files from storage. These files include the function, the data serialization and the function invocation results. It can also clean cloudobjects. :param fs: list of futures to clean :param cs: list of cloudobjects to clean :param clean_cloudobjects: true/false :param spawn_cleaner true/false """ os.makedirs(CLEANER_DIR, exist_ok=True) def save_data_to_clean(data): with tempfile.NamedTemporaryFile(dir=CLEANER_DIR, delete=False) as temp: pickle.dump(data, temp) if cs: data = { 'cos_to_clean': list(cs), 'storage_config': self.internal_storage.get_storage_config() } save_data_to_clean(data) if not fs: return futures = fs or self.futures futures = [futures] if type(futures) != list else futures present_jobs = { create_job_key(f.executor_id, f.job_id) for f in futures if (f.executor_id.count('-') == 1 and f.done) or force } jobs_to_clean = present_jobs - self.cleaned_jobs if jobs_to_clean: logger.info("ExecutorID {} - Cleaning temporary data".format( self.executor_id)) data = { 'jobs_to_clean': jobs_to_clean, 'clean_cloudobjects': clean_cloudobjects, 'storage_config': self.internal_storage.get_storage_config() } save_data_to_clean(data) self.cleaned_jobs.update(jobs_to_clean) self.compute_handler.clear() if (jobs_to_clean or cs) and spawn_cleaner: log_file = open(CLEANER_LOG_FILE, 'a') cmdstr = [sys.executable, '-m', 'lithops.scripts.cleaner'] sp.Popen(' '.join(cmdstr), shell=True, stdout=log_file, stderr=log_file) def job_summary(self, cloud_objects_n=0): """ logs information of a job executed by the calling function executor. currently supports: code_engine, ibm_vpc and ibm_cf. on future commits, support will extend to code_engine and ibm_vpc : :param cloud_objects_n: number of cloud object used in COS, declared by user. """ import pandas as pd def init(): headers = [ 'Job_ID', 'Function', 'Invocations', 'Memory(MB)', 'AvgRuntime', 'Cost', 'CloudObjects' ] pd.DataFrame([], columns=headers).to_csv(self.log_path, index=False) def append(content): """ appends job information to log file.""" pd.DataFrame(content).to_csv(self.log_path, mode='a', header=False, index=False) def append_summary(): """ add a summary row to the log file""" df = pd.read_csv(self.log_path) total_average = sum( df.AvgRuntime * df.Invocations) / df.Invocations.sum() total_row = pd.DataFrame([[ 'Summary', ' ', df.Invocations.sum(), df['Memory(MB)'].sum(), round(total_average, 10), df.Cost.sum(), cloud_objects_n ]]) total_row.to_csv(self.log_path, mode='a', header=False, index=False) def get_object_num(): """returns cloud objects used up to this point, using this function executor. """ df = pd.read_csv(self.log_path) return float(df.iloc[-1].iloc[-1]) # Avoid logging info unless chosen computational backend is supported. if hasattr(self.compute_handler.backend, 'calc_cost'): if self.log_path: # retrieve cloud_objects_n from last log file cloud_objects_n += get_object_num() else: self.log_path = os.path.join( constants.LOGS_DIR, datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv")) # override current logfile init() futures = self.futures if type(futures) != list: futures = [futures] memory = [] runtimes = [] curr_job_id = futures[0].job_id job_func = futures[ 0].function_name # each job is conducted on a single function for future in futures: if curr_job_id != future.job_id: cost = self.compute_handler.backend.calc_cost( runtimes, memory) append([[ curr_job_id, job_func, len(runtimes), sum(memory), np.round(np.average(runtimes), 10), cost, ' ' ]]) # updating next iteration's variables: curr_job_id = future.job_id job_func = future.function_name memory.clear() runtimes.clear() memory.append(future.runtime_memory) runtimes.append(future.stats['worker_exec_time']) # appends last Job-ID cost = self.compute_handler.backend.calc_cost(runtimes, memory) append([[ curr_job_id, job_func, len(runtimes), sum(memory), np.round(np.average(runtimes), 10), cost, ' ' ]]) # append summary row to end of the dataframe append_summary() else: # calc_cost() doesn't exist for chosen computational backend. logger.warning( "Could not log job: {} backend isn't supported by this function." .format(self.compute_handler.backend.name)) return logger.info("View log file logs at {}".format(self.log_path))
def wait(fs, internal_storage=None, throw_except=True, timeout=None, return_when=ALL_COMPLETED, download_results=False, job_monitor=None, threadpool_size=THREADPOOL_SIZE, wait_dur_sec=WAIT_DUR_SEC): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: Percentage of done futures :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param threadpool_zise: Number of threads to use. Default 64 :param wait_dur_sec: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ if not fs: return if type(fs) != list and type(fs) != FuturesList: fs = [fs] if download_results: msg = 'ExecutorID {} - Getting results from functions'.format( fs[0].executor_id) fs_done = [f for f in fs if f.done] fs_not_done = [f for f in fs if not f.done] else: msg = 'ExecutorID {} - Waiting for {}% of functions to complete'.format( fs[0].executor_id, return_when) fs_done = [f for f in fs if f.success or f.done] fs_not_done = [f for f in fs if not (f.success or f.done)] logger.info(msg) if not fs_not_done: return fs_done, fs_not_done if is_unix_system() and timeout is not None: logger.debug('Setting waiting timeout to {} seconds'.format(timeout)) error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format( timeout) signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg)) signal.alarm(timeout) # Setup progress bar pbar = None if not is_lithops_worker() and logger.getEffectiveLevel() == logging.INFO: from tqdm.auto import tqdm if not is_notebook(): print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(fs), disable=None) pbar.update(len(fs_done)) try: executors_data = _create_executors_data_from_futures( fs, internal_storage) if not job_monitor: for executor_data in executors_data: job_monitor = JobMonitor( executor_id=executor_data.executor_id, internal_storage=executor_data.internal_storage) job_monitor.start(fs=executor_data.futures) sleep_sec = wait_dur_sec if job_monitor.backend == 'storage' else 0.3 if return_when == ALWAYS: for executor_data in executors_data: _get_executor_data(fs, executor_data, pbar=pbar, throw_except=throw_except, download_results=download_results, threadpool_size=threadpool_size) else: while not _check_done(fs, return_when, download_results): for executor_data in executors_data: new_data = _get_executor_data( fs, executor_data, pbar=pbar, throw_except=throw_except, download_results=download_results, threadpool_size=threadpool_size) time.sleep(0 if new_data else sleep_sec) except KeyboardInterrupt as e: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in fs if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in fs if not f.success and not f.done] msg = ('Cancelled - Total Activations not done: {}'.format( len(not_dones_call_ids))) if pbar: pbar.close() print() logger.info(msg) raise e except Exception as e: raise e finally: if is_unix_system(): signal.alarm(0) if pbar and not pbar.disable: pbar.close() if not is_notebook(): print() if download_results: fs_done = [f for f in fs if f.done] fs_notdone = [f for f in fs if not f.done] else: fs_done = [f for f in fs if f.success or f.done] fs_notdone = [f for f in fs if not f.success and not f.done] return fs_done, fs_notdone
def __init__(self, mode: Optional[str] = None, config: Optional[Dict[str, Any]] = None, backend: Optional[str] = None, storage: Optional[str] = None, runtime: Optional[str] = None, runtime_memory: Optional[int] = None, monitoring: Optional[str] = None, max_workers: Optional[int] = None, worker_processes: Optional[int] = None, remote_invoker: Optional[bool] = None, log_level: Optional[str] = False): self.is_lithops_worker = is_lithops_worker() self.executor_id = create_executor_id() self.futures = [] self.cleaned_jobs = set() self.total_jobs = 0 self.last_call = None # setup lithops logging if not self.is_lithops_worker: # if is lithops worker, logging has been set up in entry_point.py if log_level: setup_lithops_logger(log_level) elif log_level is False and logger.getEffectiveLevel( ) == logging.WARNING: # Set default logging from config setup_lithops_logger(*get_log_info(config)) # overwrite user-provided parameters config_ow = {'lithops': {}, 'backend': {}} if runtime is not None: config_ow['backend']['runtime'] = runtime if runtime_memory is not None: config_ow['backend']['runtime_memory'] = int(runtime_memory) if remote_invoker is not None: config_ow['backend']['remote_invoker'] = remote_invoker if worker_processes is not None: config_ow['backend']['worker_processes'] = worker_processes if max_workers is not None: config_ow['backend']['max_workers'] = max_workers if mode is not None: config_ow['lithops']['mode'] = mode if backend is not None: config_ow['lithops']['backend'] = backend if storage is not None: config_ow['lithops']['storage'] = storage if monitoring is not None: config_ow['lithops']['monitoring'] = monitoring # Load configuration self.config = default_config(copy.deepcopy(config), config_ow) self.data_cleaner = self.config['lithops'].get('data_cleaner', True) if self.data_cleaner and not self.is_lithops_worker: atexit.register(self.clean, clean_cloudobjects=False, clean_fn=True) storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.storage = self.internal_storage.storage self.backend = self.config['lithops']['backend'] self.mode = self.config['lithops']['mode'] if self.mode == LOCALHOST: localhost_config = extract_localhost_config(self.config) self.compute_handler = LocalhostHandler(localhost_config) elif self.mode == SERVERLESS: serverless_config = extract_serverless_config(self.config) self.compute_handler = ServerlessHandler(serverless_config, self.internal_storage) elif self.mode == STANDALONE: standalone_config = extract_standalone_config(self.config) self.compute_handler = StandaloneHandler(standalone_config) # Create the monitoring system self.job_monitor = JobMonitor(executor_id=self.executor_id, internal_storage=self.internal_storage, config=self.config) # Create the invoker self.invoker = create_invoker(config=self.config, executor_id=self.executor_id, internal_storage=self.internal_storage, compute_handler=self.compute_handler, job_monitor=self.job_monitor) logger.debug( f'Function executor for {self.backend} created with ID: {self.executor_id}' ) self.log_path = None
class FunctionExecutor: """ Executor abstract class that contains the common logic for the Localhost, Serverless and Standalone executors :param mode: Execution mode. One of: localhost, serverless or standalone :param config: Settings passed in here will override those in lithops_config :param backend: Compute backend to run the functions :param storage: Storage backend to store Lithops data :param runtime: Name of the runtime to run the functions :param runtime_memory: Memory (in MB) to use to run the functions :param monitoring: Monitoring system implementation. One of: storage, rabbitmq :param max_workers: Max number of parallel workers :param worker_processes: Worker granularity, number of concurrent/parallel processes in each worker :param remote_invoker: Spawn a function that will perform the actual job invocation (True/False) :param log_level: Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled """ def __init__(self, mode: Optional[str] = None, config: Optional[Dict[str, Any]] = None, backend: Optional[str] = None, storage: Optional[str] = None, runtime: Optional[str] = None, runtime_memory: Optional[int] = None, monitoring: Optional[str] = None, max_workers: Optional[int] = None, worker_processes: Optional[int] = None, remote_invoker: Optional[bool] = None, log_level: Optional[str] = False): self.is_lithops_worker = is_lithops_worker() self.executor_id = create_executor_id() self.futures = [] self.cleaned_jobs = set() self.total_jobs = 0 self.last_call = None # setup lithops logging if not self.is_lithops_worker: # if is lithops worker, logging has been set up in entry_point.py if log_level: setup_lithops_logger(log_level) elif log_level is False and logger.getEffectiveLevel( ) == logging.WARNING: # Set default logging from config setup_lithops_logger(*get_log_info(config)) # overwrite user-provided parameters config_ow = {'lithops': {}, 'backend': {}} if runtime is not None: config_ow['backend']['runtime'] = runtime if runtime_memory is not None: config_ow['backend']['runtime_memory'] = int(runtime_memory) if remote_invoker is not None: config_ow['backend']['remote_invoker'] = remote_invoker if worker_processes is not None: config_ow['backend']['worker_processes'] = worker_processes if max_workers is not None: config_ow['backend']['max_workers'] = max_workers if mode is not None: config_ow['lithops']['mode'] = mode if backend is not None: config_ow['lithops']['backend'] = backend if storage is not None: config_ow['lithops']['storage'] = storage if monitoring is not None: config_ow['lithops']['monitoring'] = monitoring # Load configuration self.config = default_config(copy.deepcopy(config), config_ow) self.data_cleaner = self.config['lithops'].get('data_cleaner', True) if self.data_cleaner and not self.is_lithops_worker: atexit.register(self.clean, clean_cloudobjects=False, clean_fn=True) storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.storage = self.internal_storage.storage self.backend = self.config['lithops']['backend'] self.mode = self.config['lithops']['mode'] if self.mode == LOCALHOST: localhost_config = extract_localhost_config(self.config) self.compute_handler = LocalhostHandler(localhost_config) elif self.mode == SERVERLESS: serverless_config = extract_serverless_config(self.config) self.compute_handler = ServerlessHandler(serverless_config, self.internal_storage) elif self.mode == STANDALONE: standalone_config = extract_standalone_config(self.config) self.compute_handler = StandaloneHandler(standalone_config) # Create the monitoring system self.job_monitor = JobMonitor(executor_id=self.executor_id, internal_storage=self.internal_storage, config=self.config) # Create the invoker self.invoker = create_invoker(config=self.config, executor_id=self.executor_id, internal_storage=self.internal_storage, compute_handler=self.compute_handler, job_monitor=self.job_monitor) logger.debug( f'Function executor for {self.backend} created with ID: {self.executor_id}' ) self.log_path = None def __enter__(self): """ Context manager method """ return self def __exit__(self, exc_type, exc_value, traceback): """ Context manager method """ self.job_monitor.stop() self.invoker.stop() self.compute_handler.clear() def _create_job_id(self, call_type): job_id = str(self.total_jobs).zfill(3) self.total_jobs += 1 return '{}{}'.format(call_type, job_id) def call_async(self, func: Callable, data: Union[List[Any], Tuple[Any, ...], Dict[str, Any]], extra_env: Optional[Dict] = None, runtime_memory: Optional[int] = None, timeout: Optional[int] = None, include_modules: Optional[List] = [], exclude_modules: Optional[List] = []) -> ResponseFuture: """ For running one function execution asynchronously. :param func: The function to map over the data. :param data: Input data. Arguments can be passed as a list or tuple, or as a dictionary for keyword arguments. :param extra_env: Additional env variables for function environment. :param runtime_memory: Memory to use to run the function. :param timeout: Time that the function has to complete its execution before raising a timeout. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: Response future. """ job_id = self._create_job_id('A') self.last_call = 'call_async' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=job_id, map_function=func, iterdata=[data], runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures[0] def map(self, map_function: Callable, map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]], chunksize: Optional[int] = None, extra_args: Optional[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]] = None, extra_env: Optional[Dict[str, str]] = None, runtime_memory: Optional[int] = None, obj_chunk_size: Optional[int] = None, obj_chunk_number: Optional[int] = None, timeout: Optional[int] = None, include_modules: Optional[List[str]] = [], exclude_modules: Optional[List[str]] = []) -> FuturesList: """ Spawn multiple function activations based on the items of an input list. :param map_function: The function to map over the data :param map_iterdata: An iterable of input data (e.g python list). :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk :param extra_args: Additional arguments to pass to each map_function activation :param extra_env: Additional environment variables for function environment :param runtime_memory: Memory (in MB) to use to run the functions :param obj_chunk_size: Used for data processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation :param obj_chunk_number: Used for data processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set :param timeout: Max time per function activation (seconds) :param include_modules: Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None :param exclude_modules: Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules. :return: A list with size `len(map_iterdata)` of futures for each job (Futures are also internally stored by Lithops). """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number) futures = self.invoker.run_job(job) self.futures.extend(futures) if isinstance(map_iterdata, FuturesList): for fut in map_iterdata: fut._produce_output = False return create_futures_list(futures, self) def map_reduce(self, map_function: Callable, map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]], reduce_function: Callable, chunksize: Optional[int] = None, extra_args: Optional[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]] = None, extra_env: Optional[Dict[str, str]] = None, map_runtime_memory: Optional[int] = None, reduce_runtime_memory: Optional[int] = None, obj_chunk_size: Optional[int] = None, obj_chunk_number: Optional[int] = None, timeout: Optional[int] = None, reducer_one_per_object: Optional[bool] = False, spawn_reducer: Optional[int] = 20, include_modules: Optional[List[str]] = [], exclude_modules: Optional[List[str]] = []) -> FuturesList: """ Map the map_function over the data and apply the reduce_function across all futures. :param map_function: The function to map over the data :param map_iterdata: An iterable of input data :param reduce_function: The function to reduce over the futures :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param extra_args: Additional arguments to pass to function activation. Default None :param extra_env: Additional environment variables for action environment. Default None :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config) :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config) :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation :param timeout: Time that the functions have to complete their execution before raising a timeout :param reducer_one_per_object: Set one reducer per object after running the partitioner :param spawn_reducer: Percentage of done map functions before spawning the reduce function :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=map_job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) map_futures = self.invoker.run_job(map_job) self.futures.extend(map_futures) if isinstance(map_iterdata, FuturesList): for fut in map_iterdata: fut._produce_output = False if spawn_reducer != ALWAYS: self.wait(map_futures, return_when=spawn_reducer) logger.debug( f'ExecutorID {self.executor_id} | JobID {map_job_id} - ' f'{spawn_reducer}% of map activations done. Spawning reduce stage' ) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job( config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, reduce_job_id=reduce_job_id, reduce_function=reduce_function, map_job=map_job, map_futures=map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run_job(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return create_futures_list(map_futures + reduce_futures, self) def wait( self, fs: Optional[Union[ResponseFuture, FuturesList, List[ResponseFuture]]] = None, throw_except: Optional[bool] = True, return_when: Optional[Any] = ALL_COMPLETED, download_results: Optional[bool] = False, timeout: Optional[int] = None, threadpool_size: Optional[int] = THREADPOOL_SIZE, wait_dur_sec: Optional[int] = WAIT_DUR_SEC ) -> Tuple[FuturesList, FuturesList]: """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True :param return_when: Percentage of done futures :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results :param threadpool_size: Number of threads to use. Default 64 :param wait_dur_sec: Time interval between each check :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. """ futures = fs or self.futures if type(futures) != list and type(futures) != FuturesList: futures = [futures] # Start waiting for results try: wait(fs=futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor, download_results=download_results, throw_except=throw_except, return_when=return_when, timeout=timeout, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) if self.data_cleaner and return_when == ALL_COMPLETED: present_jobs = {f.job_key for f in futures} self.compute_handler.clear(present_jobs) self.clean(clean_cloudobjects=False) except (KeyboardInterrupt, Exception) as e: self.invoker.stop() self.job_monitor.stop() if not fs and is_notebook(): del self.futures[len(self.futures) - len(futures):] if self.data_cleaner: present_jobs = {f.job_key for f in futures} self.compute_handler.clear(present_jobs) self.clean(clean_cloudobjects=False, force=True) raise e if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.success or f.done] fs_notdone = [f for f in futures if not f.success and not f.done] return create_futures_list(fs_done, self), create_futures_list( fs_notdone, self) def get_result(self, fs: Optional[Union[ResponseFuture, FuturesList, List[ResponseFuture]]] = None, throw_except: Optional[bool] = True, timeout: Optional[int] = None, threadpool_size: Optional[int] = THREADPOOL_SIZE, wait_dur_sec: Optional[int] = WAIT_DUR_SEC): """ For getting the results from all function activations :param fs: Futures list. Default None :param throw_except: Reraise exception if call raised. Default True. :param timeout: Timeout for waiting for results. :param threadpool_size: Number of threads to use. Default 128 :param wait_dur_sec: Time interval between each check. :return: The result of the future/s """ fs_done, _ = self.wait(fs=fs, throw_except=throw_except, timeout=timeout, download_results=True, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) result = [] fs_done = [f for f in fs_done if not f.futures and f._produce_output] for f in fs_done: if fs: # Process futures provided by the user result.append( f.result(throw_except=throw_except, internal_storage=self.internal_storage)) elif not fs and not f._read: # Process internally stored futures result.append( f.result(throw_except=throw_except, internal_storage=self.internal_storage)) f._read = True logger.debug( f'ExecutorID {self.executor_id} - Finished getting results') if len(result) == 1 and self.last_call != 'map': return result[0] return result def plot(self, fs: Optional[Union[ResponseFuture, List[ResponseFuture], FuturesList]] = None, dst: Optional[str] = None): """ Creates timeline and histogram of the current execution in dst_dir. :param fs: list of futures. :param dst: destination path to save .png plots. """ ftrs = self.futures if not fs else fs if isinstance(ftrs, ResponseFuture): ftrs = [ftrs] ftrs_to_plot = [ f for f in ftrs if (f.success or f.done) and not f.error ] if not ftrs_to_plot: logger.debug( f'ExecutorID {self.executor_id} - No futures ready to plot') return logging.getLogger('matplotlib').setLevel(logging.WARNING) from lithops.plots import create_timeline, create_histogram logger.info( f'ExecutorID {self.executor_id} - Creating execution plots') create_timeline(ftrs_to_plot, dst) create_histogram(ftrs_to_plot, dst) def clean(self, fs: Optional[Union[ResponseFuture, List[ResponseFuture]]] = None, cs: Optional[List[CloudObject]] = None, clean_cloudobjects: Optional[bool] = True, clean_fn: Optional[bool] = False, force: Optional[bool] = False): """ Deletes all the temp files from storage. These files include the function, the data serialization and the function invocation results. It can also clean cloudobjects. :param fs: List of futures to clean :param cs: List of cloudobjects to clean :param clean_cloudobjects: Delete all cloudobjects created with this executor :param clan_fn: Delete cached functions in this executor :param force: Clean all future objects even if they have not benn completed """ global CLEANER_PROCESS def save_data_to_clean(data): with tempfile.NamedTemporaryFile(dir=CLEANER_DIR, delete=False) as temp: pickle.dump(data, temp) if cs: data = { 'cos_to_clean': list(cs), 'storage_config': self.internal_storage.get_storage_config() } save_data_to_clean(data) if not fs: return if clean_fn: data = { 'fn_to_clean': self.executor_id, 'storage_config': self.internal_storage.get_storage_config() } save_data_to_clean(data) futures = fs or self.futures futures = [futures] if type(futures) != list else futures present_jobs = { create_job_key(f.executor_id, f.job_id) for f in futures if (f.executor_id.count('-') == 1 and f.done) or force } jobs_to_clean = present_jobs - self.cleaned_jobs if jobs_to_clean: logger.info( f'ExecutorID {self.executor_id} - Cleaning temporary data') data = { 'jobs_to_clean': jobs_to_clean, 'clean_cloudobjects': clean_cloudobjects, 'storage_config': self.internal_storage.get_storage_config() } save_data_to_clean(data) self.cleaned_jobs.update(jobs_to_clean) spawn_cleaner = not (CLEANER_PROCESS and CLEANER_PROCESS.poll() is None) if (jobs_to_clean or cs) and spawn_cleaner: cmd = [sys.executable, '-m', 'lithops.scripts.cleaner'] CLEANER_PROCESS = sp.Popen(cmd, start_new_session=True) def job_summary(self, cloud_objects_n: Optional[int] = 0): """ Logs information of a job executed by the calling function executor. currently supports: code_engine, ibm_vpc and ibm_cf. :param cloud_objects_n: number of cloud object used in COS, declared by user. """ import pandas as pd import numpy as np def init(): headers = [ 'Job_ID', 'Function', 'Invocations', 'Memory(MB)', 'AvgRuntime', 'Cost', 'CloudObjects' ] pd.DataFrame([], columns=headers).to_csv(self.log_path, index=False) def append(content): """ appends job information to log file.""" pd.DataFrame(content).to_csv(self.log_path, mode='a', header=False, index=False) def append_summary(): """ add a summary row to the log file""" df = pd.read_csv(self.log_path) total_average = sum( df.AvgRuntime * df.Invocations) / df.Invocations.sum() total_row = pd.DataFrame([[ 'Summary', ' ', df.Invocations.sum(), df['Memory(MB)'].sum(), round(total_average, 10), df.Cost.sum(), cloud_objects_n ]]) total_row.to_csv(self.log_path, mode='a', header=False, index=False) def get_object_num(): """returns cloud objects used up to this point, using this function executor. """ df = pd.read_csv(self.log_path) return float(df.iloc[-1].iloc[-1]) # Avoid logging info unless chosen computational backend is supported. if hasattr(self.compute_handler.backend, 'calc_cost'): if self.log_path: # retrieve cloud_objects_n from last log file cloud_objects_n += get_object_num() else: self.log_path = os.path.join( constants.LOGS_DIR, datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv")) # override current logfile init() futures = self.futures if type(futures) != list: futures = [futures] memory = [] runtimes = [] curr_job_id = futures[0].job_id job_func = futures[ 0].function_name # each job is conducted on a single function for future in futures: if curr_job_id != future.job_id: cost = self.compute_handler.backend.calc_cost( runtimes, memory) append([[ curr_job_id, job_func, len(runtimes), sum(memory), np.round(np.average(runtimes), 10), cost, ' ' ]]) # updating next iteration's variables: curr_job_id = future.job_id job_func = future.function_name memory.clear() runtimes.clear() memory.append(future.runtime_memory) runtimes.append(future.stats['worker_exec_time']) # appends last Job-ID cost = self.compute_handler.backend.calc_cost(runtimes, memory) append([[ curr_job_id, job_func, len(runtimes), sum(memory), np.round(np.average(runtimes), 10), cost, ' ' ]]) # append summary row to end of the dataframe append_summary() else: # calc_cost() doesn't exist for chosen computational backend. logger.warning( "Could not log job: {} backend isn't supported by this function." .format(self.compute_handler.backend.name)) return logger.info("View log file logs at {}".format(self.log_path))