コード例 #1
0
ファイル: invoker.py プロジェクト: syllogy/lithops
def function_invoker(job_payload):
    """
    Method used as a remote invoker
    """
    config = job_payload['config']
    job = SimpleNamespace(**job_payload['job'])

    env = {
        'LITHOPS_WORKER': 'True',
        'PYTHONUNBUFFERED': 'True',
        '__LITHOPS_SESSION_ID': job.job_key
    }
    os.environ.update(env)

    # Create the monitoring system
    monitoring_backend = config['lithops']['monitoring'].lower()
    monitoring_config = config.get(monitoring_backend)
    job_monitor = JobMonitor(monitoring_backend, monitoring_config)

    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    serverless_config = extract_serverless_config(config)
    compute_handler = ServerlessHandler(serverless_config, storage_config)

    # Create the invokder
    invoker = FaaSRemoteInvoker(config, job.executor_id, internal_storage,
                                compute_handler, job_monitor)
    invoker.run_job(job)
コード例 #2
0
ファイル: executors.py プロジェクト: syllogy/lithops
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 monitoring=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        monitoring_backend = self.config['lithops']['monitoring'].lower()
        monitoring_config = self.config.get(monitoring_backend)
        self.job_monitor = JobMonitor(monitoring_backend, monitoring_config)

        # Create the invokder
        self.invoker = create_invoker(self.config, self.executor_id,
                                      self.internal_storage,
                                      self.compute_handler, self.job_monitor)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

        self.log_path = None
コード例 #3
0
ファイル: executors.py プロジェクト: syllogy/lithops
class FunctionExecutor:
    """
    Executor abstract class that contains the common logic
    for the Localhost, Serverless and Standalone executors
    """
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 monitoring=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        monitoring_backend = self.config['lithops']['monitoring'].lower()
        monitoring_config = self.config.get(monitoring_backend)
        self.job_monitor = JobMonitor(monitoring_backend, monitoring_config)

        # Create the invokder
        self.invoker = create_invoker(self.config, self.executor_id,
                                      self.internal_storage,
                                      self.compute_handler, self.job_monitor)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

        self.log_path = None

    def __enter__(self):
        """ Context manager method """
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """ Context manager method """
        self.job_monitor.stop()
        self.invoker.stop()

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   runtime_memory=None,
                   timeout=None,
                   include_modules=[],
                   exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param timeout: Time that the functions have to complete their
                        execution before raising a timeout
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: future object.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function,
            map_iterdata,
            chunksize=None,
            worker_processes=None,
            extra_args=None,
            extra_env=None,
            runtime_memory=None,
            chunk_size=None,
            chunk_n=None,
            obj_chunk_size=None,
            obj_chunk_number=None,
            timeout=None,
            invoke_pool_threads=None,
            include_modules=[],
            exclude_modules=[]):
        """
        For running multiple function executions asynchronously

        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1
        :param extra_args: Additional args to pass to the function activations
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param obj_chunk_size: the size of the data chunks to split each object.
                           'None' for processing the whole file in one function
                           activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for
                                 processing the whole file in one function activation
        :param remote_invocation: Enable or disable remote_invocation mechanism
        :param timeout: Time that the functions have to complete their execution
                        before raising a timeout
        :param invoke_pool_threads: Number of threads to use to invoke
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             worker_processes=worker_processes,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             chunk_size=chunk_size,
                             chunk_n=chunk_n,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunksize=None,
                   worker_processes=None,
                   extra_args=None,
                   extra_env=None,
                   map_runtime_memory=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   reduce_runtime_memory=None,
                   chunk_size=None,
                   chunk_n=None,
                   timeout=None,
                   invoke_pool_threads=None,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   include_modules=[],
                   exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker Default 1
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_args: Additional arguments to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing
                               the whole file in one function activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole
                                 file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(self.config,
                                 self.internal_storage,
                                 self.executor_id,
                                 map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 worker_processes=worker_processes,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 chunk_size=chunk_size,
                                 chunk_n=chunk_n,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout,
                                 invoke_pool_threads=invoke_pool_threads)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            wait(fs=map_futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            self.config,
            self.internal_storage,
            self.executor_id,
            reduce_job_id,
            reduce_function,
            map_job,
            map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return map_futures + reduce_futures

    def wait(self,
             fs=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             download_results=False,
             timeout=None,
             threadpool_size=THREADPOOL_SIZE,
             wait_dur_sec=WAIT_DUR_SEC):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param threadpool_size: Number of threads to use. Default 64
        :param wait_dur_sec: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = fs or self.futures
        if type(futures) != list:
            futures = [futures]

        # Start waiting for results
        try:
            wait(fs=futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 timeout=timeout,
                 threadpool_size=threadpool_size,
                 wait_dur_sec=wait_dur_sec)

        except Exception as e:
            self.invoker.stop()
            if not fs and is_notebook():
                del self.futures[len(self.futures) - len(futures):]
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        finally:
            present_jobs = {f.job_key for f in futures}
            self.job_monitor.stop(present_jobs)
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False)

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.success or f.done]
            fs_notdone = [f for f in futures if not f.success and not f.done]

        return fs_done, fs_notdone

    def get_result(self,
                   fs=None,
                   throw_except=True,
                   timeout=None,
                   threadpool_size=THREADPOOL_SIZE,
                   wait_dur_sec=WAIT_DUR_SEC):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 128
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               threadpool_size=threadpool_size,
                               wait_dur_sec=wait_dur_sec)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug("ExecutorID {} Finished getting results".format(
            self.executor_id))

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self, fs=None, dst=None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param dst_dir: destination folder to save .png plots.
        :param dst_file_name: prefix name of the file.
        :param fs: list of futures.
        """
        ftrs = self.futures if not fs else fs

        if type(ftrs) != list:
            ftrs = [ftrs]

        ftrs_to_plot = [
            f for f in ftrs if (f.success or f.done) and not f.error
        ]

        if not ftrs_to_plot:
            logger.debug('ExecutorID {} - No futures ready to plot'.format(
                self.executor_id))
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info('ExecutorID {} - Creating execution plots'.format(
            self.executor_id))

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs=None,
              cs=None,
              clean_cloudobjects=True,
              spawn_cleaner=True,
              force=False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: list of futures to clean
        :param cs: list of cloudobjects to clean
        :param clean_cloudobjects: true/false
        :param spawn_cleaner true/false
        """

        os.makedirs(CLEANER_DIR, exist_ok=True)

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info("ExecutorID {} - Cleaning temporary data".format(
                self.executor_id))
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

            self.compute_handler.clear()

        if (jobs_to_clean or cs) and spawn_cleaner:
            log_file = open(CLEANER_LOG_FILE, 'a')
            cmdstr = [sys.executable, '-m', 'lithops.scripts.cleaner']
            sp.Popen(' '.join(cmdstr),
                     shell=True,
                     stdout=log_file,
                     stderr=log_file)

    def job_summary(self, cloud_objects_n=0):
        """
        logs information of a job executed by the calling function executor.
        currently supports: code_engine, ibm_vpc and ibm_cf.
        on future commits, support will extend to code_engine and ibm_vpc :

        :param cloud_objects_n: number of cloud object used in COS, declared by user.
        """
        import pandas as pd

        def init():
            headers = [
                'Job_ID', 'Function', 'Invocations', 'Memory(MB)',
                'AvgRuntime', 'Cost', 'CloudObjects'
            ]
            pd.DataFrame([], columns=headers).to_csv(self.log_path,
                                                     index=False)

        def append(content):
            """ appends job information to log file."""
            pd.DataFrame(content).to_csv(self.log_path,
                                         mode='a',
                                         header=False,
                                         index=False)

        def append_summary():
            """ add a summary row to the log file"""
            df = pd.read_csv(self.log_path)
            total_average = sum(
                df.AvgRuntime * df.Invocations) / df.Invocations.sum()
            total_row = pd.DataFrame([[
                'Summary', ' ',
                df.Invocations.sum(), df['Memory(MB)'].sum(),
                round(total_average, 10),
                df.Cost.sum(), cloud_objects_n
            ]])
            total_row.to_csv(self.log_path,
                             mode='a',
                             header=False,
                             index=False)

        def get_object_num():
            """returns cloud objects used up to this point, using this function executor. """
            df = pd.read_csv(self.log_path)
            return float(df.iloc[-1].iloc[-1])

        # Avoid logging info unless chosen computational backend is supported.
        if hasattr(self.compute_handler.backend, 'calc_cost'):

            if self.log_path:  # retrieve cloud_objects_n from last log file
                cloud_objects_n += get_object_num()
            else:
                self.log_path = os.path.join(
                    constants.LOGS_DIR,
                    datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv"))
            # override current logfile
            init()

            futures = self.futures
            if type(futures) != list:
                futures = [futures]

            memory = []
            runtimes = []
            curr_job_id = futures[0].job_id
            job_func = futures[
                0].function_name  # each job is conducted on a single function

            for future in futures:
                if curr_job_id != future.job_id:
                    cost = self.compute_handler.backend.calc_cost(
                        runtimes, memory)
                    append([[
                        curr_job_id, job_func,
                        len(runtimes),
                        sum(memory),
                        np.round(np.average(runtimes), 10), cost, ' '
                    ]])

                    # updating next iteration's variables:
                    curr_job_id = future.job_id
                    job_func = future.function_name
                    memory.clear()
                    runtimes.clear()

                memory.append(future.runtime_memory)
                runtimes.append(future.stats['worker_exec_time'])

            # appends last Job-ID
            cost = self.compute_handler.backend.calc_cost(runtimes, memory)
            append([[
                curr_job_id, job_func,
                len(runtimes),
                sum(memory),
                np.round(np.average(runtimes), 10), cost, ' '
            ]])
            # append summary row to end of the dataframe
            append_summary()

        else:  # calc_cost() doesn't exist for chosen computational backend.
            logger.warning(
                "Could not log job: {} backend isn't supported by this function."
                .format(self.compute_handler.backend.name))
            return
        logger.info("View log file logs at {}".format(self.log_path))
コード例 #4
0
def wait(fs,
         internal_storage=None,
         throw_except=True,
         timeout=None,
         return_when=ALL_COMPLETED,
         download_results=False,
         job_monitor=None,
         threadpool_size=THREADPOOL_SIZE,
         wait_dur_sec=WAIT_DUR_SEC):
    """
    Wait for the Future instances (possibly created by different Executor instances)
    given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
    contains the futures that completed (finished or cancelled futures) before the wait
    completed. The second set, named not_done, contains the futures that did not complete
    (pending or running futures). timeout can be used to control the maximum number of
    seconds to wait before returning.

    :param fs: Futures list. Default None
    :param throw_except: Re-raise exception if call raised. Default True.
    :param return_when: Percentage of done futures
    :param download_results: Download results. Default false (Only get statuses)
    :param timeout: Timeout of waiting for results.
    :param threadpool_zise: Number of threads to use. Default 64
    :param wait_dur_sec: Time interval between each check.

    :return: `(fs_done, fs_notdone)`
        where `fs_done` is a list of futures that have completed
        and `fs_notdone` is a list of futures that have not completed.
    :rtype: 2-tuple of list
    """
    if not fs:
        return

    if type(fs) != list and type(fs) != FuturesList:
        fs = [fs]

    if download_results:
        msg = 'ExecutorID {} - Getting results from functions'.format(
            fs[0].executor_id)
        fs_done = [f for f in fs if f.done]
        fs_not_done = [f for f in fs if not f.done]

    else:
        msg = 'ExecutorID {} - Waiting for {}% of functions to complete'.format(
            fs[0].executor_id, return_when)
        fs_done = [f for f in fs if f.success or f.done]
        fs_not_done = [f for f in fs if not (f.success or f.done)]

    logger.info(msg)

    if not fs_not_done:
        return fs_done, fs_not_done

    if is_unix_system() and timeout is not None:
        logger.debug('Setting waiting timeout to {} seconds'.format(timeout))
        error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(
            timeout)
        signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg))
        signal.alarm(timeout)

    # Setup progress bar
    pbar = None
    if not is_lithops_worker() and logger.getEffectiveLevel() == logging.INFO:
        from tqdm.auto import tqdm
        if not is_notebook():
            print()
        pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                    total=len(fs),
                    disable=None)
        pbar.update(len(fs_done))

    try:
        executors_data = _create_executors_data_from_futures(
            fs, internal_storage)

        if not job_monitor:
            for executor_data in executors_data:
                job_monitor = JobMonitor(
                    executor_id=executor_data.executor_id,
                    internal_storage=executor_data.internal_storage)
                job_monitor.start(fs=executor_data.futures)

        sleep_sec = wait_dur_sec if job_monitor.backend == 'storage' else 0.3

        if return_when == ALWAYS:
            for executor_data in executors_data:
                _get_executor_data(fs,
                                   executor_data,
                                   pbar=pbar,
                                   throw_except=throw_except,
                                   download_results=download_results,
                                   threadpool_size=threadpool_size)
        else:
            while not _check_done(fs, return_when, download_results):
                for executor_data in executors_data:
                    new_data = _get_executor_data(
                        fs,
                        executor_data,
                        pbar=pbar,
                        throw_except=throw_except,
                        download_results=download_results,
                        threadpool_size=threadpool_size)
                time.sleep(0 if new_data else sleep_sec)

    except KeyboardInterrupt as e:
        if download_results:
            not_dones_call_ids = [(f.job_id, f.call_id) for f in fs
                                  if not f.done]
        else:
            not_dones_call_ids = [(f.job_id, f.call_id) for f in fs
                                  if not f.success and not f.done]
        msg = ('Cancelled - Total Activations not done: {}'.format(
            len(not_dones_call_ids)))
        if pbar:
            pbar.close()
            print()
        logger.info(msg)
        raise e

    except Exception as e:
        raise e

    finally:
        if is_unix_system():
            signal.alarm(0)
        if pbar and not pbar.disable:
            pbar.close()
            if not is_notebook():
                print()

    if download_results:
        fs_done = [f for f in fs if f.done]
        fs_notdone = [f for f in fs if not f.done]
    else:
        fs_done = [f for f in fs if f.success or f.done]
        fs_notdone = [f for f in fs if not f.success and not f.done]

    return fs_done, fs_notdone
コード例 #5
0
    def __init__(self,
                 mode: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None,
                 backend: Optional[str] = None,
                 storage: Optional[str] = None,
                 runtime: Optional[str] = None,
                 runtime_memory: Optional[int] = None,
                 monitoring: Optional[str] = None,
                 max_workers: Optional[int] = None,
                 worker_processes: Optional[int] = None,
                 remote_invoker: Optional[bool] = None,
                 log_level: Optional[str] = False):
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()
        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # overwrite user-provided parameters
        config_ow = {'lithops': {}, 'backend': {}}
        if runtime is not None:
            config_ow['backend']['runtime'] = runtime
        if runtime_memory is not None:
            config_ow['backend']['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow['backend']['remote_invoker'] = remote_invoker
        if worker_processes is not None:
            config_ow['backend']['worker_processes'] = worker_processes
        if max_workers is not None:
            config_ow['backend']['max_workers'] = max_workers

        if mode is not None:
            config_ow['lithops']['mode'] = mode
        if backend is not None:
            config_ow['lithops']['backend'] = backend
        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        # Load configuration
        self.config = default_config(copy.deepcopy(config), config_ow)

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            atexit.register(self.clean,
                            clean_cloudobjects=False,
                            clean_fn=True)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.backend = self.config['lithops']['backend']
        self.mode = self.config['lithops']['mode']

        if self.mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif self.mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif self.mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        self.job_monitor = JobMonitor(executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      config=self.config)

        # Create the invoker
        self.invoker = create_invoker(config=self.config,
                                      executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      compute_handler=self.compute_handler,
                                      job_monitor=self.job_monitor)

        logger.debug(
            f'Function executor for {self.backend} created with ID: {self.executor_id}'
        )

        self.log_path = None
コード例 #6
0
class FunctionExecutor:
    """
    Executor abstract class that contains the common logic for the Localhost, Serverless and Standalone executors

    :param mode: Execution mode. One of: localhost, serverless or standalone
    :param config: Settings passed in here will override those in lithops_config
    :param backend: Compute backend to run the functions
    :param storage: Storage backend to store Lithops data
    :param runtime: Name of the runtime to run the functions
    :param runtime_memory: Memory (in MB) to use to run the functions
    :param monitoring: Monitoring system implementation. One of: storage, rabbitmq
    :param max_workers: Max number of parallel workers
    :param worker_processes: Worker granularity, number of concurrent/parallel processes in each worker
    :param remote_invoker: Spawn a function that will perform the actual job invocation (True/False)
    :param log_level: Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled
    """
    def __init__(self,
                 mode: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None,
                 backend: Optional[str] = None,
                 storage: Optional[str] = None,
                 runtime: Optional[str] = None,
                 runtime_memory: Optional[int] = None,
                 monitoring: Optional[str] = None,
                 max_workers: Optional[int] = None,
                 worker_processes: Optional[int] = None,
                 remote_invoker: Optional[bool] = None,
                 log_level: Optional[str] = False):
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()
        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # overwrite user-provided parameters
        config_ow = {'lithops': {}, 'backend': {}}
        if runtime is not None:
            config_ow['backend']['runtime'] = runtime
        if runtime_memory is not None:
            config_ow['backend']['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow['backend']['remote_invoker'] = remote_invoker
        if worker_processes is not None:
            config_ow['backend']['worker_processes'] = worker_processes
        if max_workers is not None:
            config_ow['backend']['max_workers'] = max_workers

        if mode is not None:
            config_ow['lithops']['mode'] = mode
        if backend is not None:
            config_ow['lithops']['backend'] = backend
        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        # Load configuration
        self.config = default_config(copy.deepcopy(config), config_ow)

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            atexit.register(self.clean,
                            clean_cloudobjects=False,
                            clean_fn=True)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.backend = self.config['lithops']['backend']
        self.mode = self.config['lithops']['mode']

        if self.mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif self.mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif self.mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        self.job_monitor = JobMonitor(executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      config=self.config)

        # Create the invoker
        self.invoker = create_invoker(config=self.config,
                                      executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      compute_handler=self.compute_handler,
                                      job_monitor=self.job_monitor)

        logger.debug(
            f'Function executor for {self.backend} created with ID: {self.executor_id}'
        )

        self.log_path = None

    def __enter__(self):
        """ Context manager method """
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """ Context manager method """
        self.job_monitor.stop()
        self.invoker.stop()
        self.compute_handler.clear()

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func: Callable,
                   data: Union[List[Any], Tuple[Any, ...], Dict[str, Any]],
                   extra_env: Optional[Dict] = None,
                   runtime_memory: Optional[int] = None,
                   timeout: Optional[int] = None,
                   include_modules: Optional[List] = [],
                   exclude_modules: Optional[List] = []) -> ResponseFuture:
        """
        For running one function execution asynchronously.

        :param func: The function to map over the data.
        :param data: Input data. Arguments can be passed as a list or tuple, or as a dictionary for keyword arguments.
        :param extra_env: Additional env variables for function environment.
        :param runtime_memory: Memory to use to run the function.
        :param timeout: Time that the function has to complete its execution before raising a timeout.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: Response future.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function: Callable,
            map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str,
                                                                      Any]]],
            chunksize: Optional[int] = None,
            extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                       Dict[str, Any]]] = None,
            extra_env: Optional[Dict[str, str]] = None,
            runtime_memory: Optional[int] = None,
            obj_chunk_size: Optional[int] = None,
            obj_chunk_number: Optional[int] = None,
            timeout: Optional[int] = None,
            include_modules: Optional[List[str]] = [],
            exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Spawn multiple function activations based on the items of an input list.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data (e.g python list).
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk
        :param extra_args: Additional arguments to pass to each map_function activation
        :param extra_env: Additional environment variables for function environment
        :param runtime_memory: Memory (in MB) to use to run the functions
        :param obj_chunk_size: Used for data processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Used for data processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set
        :param timeout: Max time per function activation (seconds)
        :param include_modules: Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None
        :param exclude_modules: Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules.

        :return: A list with size `len(map_iterdata)` of futures for each job (Futures are also internally stored by Lithops).
        """

        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        return create_futures_list(futures, self)

    def map_reduce(self,
                   map_function: Callable,
                   map_iterdata: List[Union[List[Any], Tuple[Any, ...],
                                            Dict[str, Any]]],
                   reduce_function: Callable,
                   chunksize: Optional[int] = None,
                   extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                              Dict[str, Any]]] = None,
                   extra_env: Optional[Dict[str, str]] = None,
                   map_runtime_memory: Optional[int] = None,
                   reduce_runtime_memory: Optional[int] = None,
                   obj_chunk_size: Optional[int] = None,
                   obj_chunk_number: Optional[int] = None,
                   timeout: Optional[int] = None,
                   reducer_one_per_object: Optional[bool] = False,
                   spawn_reducer: Optional[int] = 20,
                   include_modules: Optional[List[str]] = [],
                   exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Map the map_function over the data and apply the reduce_function across all futures.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data
        :param reduce_function: The function to reduce over the futures
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1
        :param extra_args: Additional arguments to pass to function activation. Default None
        :param extra_env: Additional environment variables for action environment. Default None
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config)
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config)
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation
        :param timeout: Time that the functions have to complete their execution before raising a timeout
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param spawn_reducer: Percentage of done map functions before spawning the reduce function
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(config=self.config,
                                 internal_storage=self.internal_storage,
                                 executor_id=self.executor_id,
                                 job_id=map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        if spawn_reducer != ALWAYS:
            self.wait(map_futures, return_when=spawn_reducer)
            logger.debug(
                f'ExecutorID {self.executor_id} | JobID {map_job_id} - '
                f'{spawn_reducer}% of map activations done. Spawning reduce stage'
            )

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            config=self.config,
            internal_storage=self.internal_storage,
            executor_id=self.executor_id,
            reduce_job_id=reduce_job_id,
            reduce_function=reduce_function,
            map_job=map_job,
            map_futures=map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return create_futures_list(map_futures + reduce_futures, self)

    def wait(
        self,
        fs: Optional[Union[ResponseFuture, FuturesList,
                           List[ResponseFuture]]] = None,
        throw_except: Optional[bool] = True,
        return_when: Optional[Any] = ALL_COMPLETED,
        download_results: Optional[bool] = False,
        timeout: Optional[int] = None,
        threadpool_size: Optional[int] = THREADPOOL_SIZE,
        wait_dur_sec: Optional[int] = WAIT_DUR_SEC
    ) -> Tuple[FuturesList, FuturesList]:
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True
        :param return_when: Percentage of done futures
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results
        :param threadpool_size: Number of threads to use. Default 64
        :param wait_dur_sec: Time interval between each check

        :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed.
        """
        futures = fs or self.futures
        if type(futures) != list and type(futures) != FuturesList:
            futures = [futures]

        # Start waiting for results
        try:
            wait(fs=futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 timeout=timeout,
                 threadpool_size=threadpool_size,
                 wait_dur_sec=wait_dur_sec)

            if self.data_cleaner and return_when == ALL_COMPLETED:
                present_jobs = {f.job_key for f in futures}
                self.compute_handler.clear(present_jobs)
                self.clean(clean_cloudobjects=False)

        except (KeyboardInterrupt, Exception) as e:
            self.invoker.stop()
            self.job_monitor.stop()
            if not fs and is_notebook():
                del self.futures[len(self.futures) - len(futures):]
            if self.data_cleaner:
                present_jobs = {f.job_key for f in futures}
                self.compute_handler.clear(present_jobs)
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.success or f.done]
            fs_notdone = [f for f in futures if not f.success and not f.done]

        return create_futures_list(fs_done, self), create_futures_list(
            fs_notdone, self)

    def get_result(self,
                   fs: Optional[Union[ResponseFuture, FuturesList,
                                      List[ResponseFuture]]] = None,
                   throw_except: Optional[bool] = True,
                   timeout: Optional[int] = None,
                   threadpool_size: Optional[int] = THREADPOOL_SIZE,
                   wait_dur_sec: Optional[int] = WAIT_DUR_SEC):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param timeout: Timeout for waiting for results.
        :param threadpool_size: Number of threads to use. Default 128
        :param wait_dur_sec: Time interval between each check.
        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               threadpool_size=threadpool_size,
                               wait_dur_sec=wait_dur_sec)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug(
            f'ExecutorID {self.executor_id} - Finished getting results')

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self,
             fs: Optional[Union[ResponseFuture, List[ResponseFuture],
                                FuturesList]] = None,
             dst: Optional[str] = None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param fs: list of futures.
        :param dst: destination path to save .png plots.
        """
        ftrs = self.futures if not fs else fs

        if isinstance(ftrs, ResponseFuture):
            ftrs = [ftrs]

        ftrs_to_plot = [
            f for f in ftrs if (f.success or f.done) and not f.error
        ]

        if not ftrs_to_plot:
            logger.debug(
                f'ExecutorID {self.executor_id} - No futures ready to plot')
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info(
            f'ExecutorID {self.executor_id} - Creating execution plots')

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs: Optional[Union[ResponseFuture, List[ResponseFuture]]] = None,
              cs: Optional[List[CloudObject]] = None,
              clean_cloudobjects: Optional[bool] = True,
              clean_fn: Optional[bool] = False,
              force: Optional[bool] = False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: List of futures to clean
        :param cs: List of cloudobjects to clean
        :param clean_cloudobjects: Delete all cloudobjects created with this executor
        :param clan_fn: Delete cached functions in this executor
        :param force: Clean all future objects even if they have not benn completed
        """
        global CLEANER_PROCESS

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        if clean_fn:
            data = {
                'fn_to_clean': self.executor_id,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info(
                f'ExecutorID {self.executor_id} - Cleaning temporary data')
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

        spawn_cleaner = not (CLEANER_PROCESS
                             and CLEANER_PROCESS.poll() is None)
        if (jobs_to_clean or cs) and spawn_cleaner:
            cmd = [sys.executable, '-m', 'lithops.scripts.cleaner']
            CLEANER_PROCESS = sp.Popen(cmd, start_new_session=True)

    def job_summary(self, cloud_objects_n: Optional[int] = 0):
        """
        Logs information of a job executed by the calling function executor.
        currently supports: code_engine, ibm_vpc and ibm_cf.

        :param cloud_objects_n: number of cloud object used in COS, declared by user.
        """
        import pandas as pd
        import numpy as np

        def init():
            headers = [
                'Job_ID', 'Function', 'Invocations', 'Memory(MB)',
                'AvgRuntime', 'Cost', 'CloudObjects'
            ]
            pd.DataFrame([], columns=headers).to_csv(self.log_path,
                                                     index=False)

        def append(content):
            """ appends job information to log file."""
            pd.DataFrame(content).to_csv(self.log_path,
                                         mode='a',
                                         header=False,
                                         index=False)

        def append_summary():
            """ add a summary row to the log file"""
            df = pd.read_csv(self.log_path)
            total_average = sum(
                df.AvgRuntime * df.Invocations) / df.Invocations.sum()
            total_row = pd.DataFrame([[
                'Summary', ' ',
                df.Invocations.sum(), df['Memory(MB)'].sum(),
                round(total_average, 10),
                df.Cost.sum(), cloud_objects_n
            ]])
            total_row.to_csv(self.log_path,
                             mode='a',
                             header=False,
                             index=False)

        def get_object_num():
            """returns cloud objects used up to this point, using this function executor. """
            df = pd.read_csv(self.log_path)
            return float(df.iloc[-1].iloc[-1])

        # Avoid logging info unless chosen computational backend is supported.
        if hasattr(self.compute_handler.backend, 'calc_cost'):

            if self.log_path:  # retrieve cloud_objects_n from last log file
                cloud_objects_n += get_object_num()
            else:
                self.log_path = os.path.join(
                    constants.LOGS_DIR,
                    datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv"))
            # override current logfile
            init()

            futures = self.futures
            if type(futures) != list:
                futures = [futures]

            memory = []
            runtimes = []
            curr_job_id = futures[0].job_id
            job_func = futures[
                0].function_name  # each job is conducted on a single function

            for future in futures:
                if curr_job_id != future.job_id:
                    cost = self.compute_handler.backend.calc_cost(
                        runtimes, memory)
                    append([[
                        curr_job_id, job_func,
                        len(runtimes),
                        sum(memory),
                        np.round(np.average(runtimes), 10), cost, ' '
                    ]])

                    # updating next iteration's variables:
                    curr_job_id = future.job_id
                    job_func = future.function_name
                    memory.clear()
                    runtimes.clear()

                memory.append(future.runtime_memory)
                runtimes.append(future.stats['worker_exec_time'])

            # appends last Job-ID
            cost = self.compute_handler.backend.calc_cost(runtimes, memory)
            append([[
                curr_job_id, job_func,
                len(runtimes),
                sum(memory),
                np.round(np.average(runtimes), 10), cost, ' '
            ]])
            # append summary row to end of the dataframe
            append_summary()

        else:  # calc_cost() doesn't exist for chosen computational backend.
            logger.warning(
                "Could not log job: {} backend isn't supported by this function."
                .format(self.compute_handler.backend.name))
            return
        logger.info("View log file logs at {}".format(self.log_path))