Ejemplo n.º 1
0
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 rabbitmq_monitor=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        self.setup_progressbar = (not self.is_lithops_worker
                                  and log_level is not None and
                                  logger.getEffectiveLevel() == logging.INFO)

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        self.rabbitmq_monitor = self.config['lithops'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     storage_config)

            if self.config[mode].get('customized_runtime'):
                self.invoker = CustomizedRuntimeInvoker(
                    self.config, self.executor_id, self.internal_storage,
                    self.compute_handler)
            else:
                self.invoker = ServerlessInvoker(self.config, self.executor_id,
                                                 self.internal_storage,
                                                 self.compute_handler)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))
Ejemplo n.º 2
0
    def __init__(self,
                 type=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 rabbitmq_monitor=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=None):

        if type is None:
            config = default_config(copy.deepcopy(config))
            type = config['lithops']['executor']

        if log_level:
            default_logging_config(log_level)

        config_ow = {'lithops': {'executor': type}, type: {}}

        if runtime is not None:
            config_ow[type]['runtime'] = runtime
        if backend is not None:
            config_ow[type]['backend'] = backend
        if runtime_memory is not None:
            config_ow[type]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[type]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.log_active = logger.getEffectiveLevel() != logging.WARNING
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        self.rabbitmq_monitor = self.config['lithops'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        self.storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(self.storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.total_jobs = 0
        self.cleaned_jobs = set()
        self.last_call = None

        if type == 'localhost':
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif type == 'serverless':
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.storage_config)

            self.invoker = ServerlessInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif type == 'standalone':
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        else:
            raise Exception("Function executor type must be one of "
                            "'localhost', 'serverless' or 'standalone'")

        logger.info('{} Executor created with ID: {}'.format(
            type.capitalize(), self.executor_id))
Ejemplo n.º 3
0
class FunctionExecutor:
    """
    Executor abstract class that contains the common logic
    for the Localhost, Serverless and Standalone executors
    """
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 rabbitmq_monitor=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        self.setup_progressbar = (not self.is_lithops_worker
                                  and log_level is not None and
                                  logger.getEffectiveLevel() == logging.INFO)

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        self.rabbitmq_monitor = self.config['lithops'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     storage_config)

            if self.config[mode].get('customized_runtime'):
                self.invoker = CustomizedRuntimeInvoker(
                    self.config, self.executor_id, self.internal_storage,
                    self.compute_handler)
            else:
                self.invoker = ServerlessInvoker(self.config, self.executor_id,
                                                 self.internal_storage,
                                                 self.compute_handler)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

    def __enter__(self):
        return self

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   runtime_memory=None,
                   timeout=None,
                   include_modules=[],
                   exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param timeout: Time that the functions have to complete their
                        execution before raising a timeout
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: future object.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function,
            map_iterdata,
            chunksize=None,
            worker_processes=None,
            extra_args=None,
            extra_env=None,
            runtime_memory=None,
            chunk_size=None,
            chunk_n=None,
            obj_chunk_size=None,
            obj_chunk_number=None,
            timeout=None,
            invoke_pool_threads=None,
            include_modules=[],
            exclude_modules=[]):
        """
        For running multiple function executions asynchronously

        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1
        :param extra_args: Additional args to pass to the function activations
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param obj_chunk_size: the size of the data chunks to split each object.
                           'None' for processing the whole file in one function
                           activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for
                                 processing the whole file in one function activation
        :param remote_invocation: Enable or disable remote_invocation mechanism
        :param timeout: Time that the functions have to complete their execution
                        before raising a timeout
        :param invoke_pool_threads: Number of threads to use to invoke
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             worker_processes=worker_processes,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             chunk_size=chunk_size,
                             chunk_n=chunk_n,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run(job)
        self.futures.extend(futures)

        return futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunksize=None,
                   worker_processes=None,
                   extra_args=None,
                   extra_env=None,
                   map_runtime_memory=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   reduce_runtime_memory=None,
                   chunk_size=None,
                   chunk_n=None,
                   timeout=None,
                   invoke_pool_threads=None,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   include_modules=[],
                   exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker Default 1
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_args: Additional arguments to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing
                               the whole file in one function activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole
                                 file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(self.config,
                                 self.internal_storage,
                                 self.executor_id,
                                 map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 worker_processes=worker_processes,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 chunk_size=chunk_size,
                                 chunk_n=chunk_n,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout,
                                 invoke_pool_threads=invoke_pool_threads)

        map_futures = self.invoker.run(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            self.wait(fs=map_futures)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            self.config,
            self.internal_storage,
            self.executor_id,
            reduce_job_id,
            reduce_function,
            map_job,
            map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run(reduce_job)

        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return map_futures + reduce_futures

    def wait(self,
             fs=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             download_results=False,
             timeout=None,
             THREADPOOL_SIZE=128,
             WAIT_DUR_SEC=1):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = fs or self.futures
        if type(futures) != list:
            futures = [futures]

        if not futures:
            raise Exception(
                'You must run the call_async(), map() or map_reduce(), or provide'
                ' a list of futures before calling the wait()/get_result() method'
            )

        if download_results:
            msg = 'ExecutorID {} - Getting results'.format(self.executor_id)
            fs_done = [f for f in futures if f.done]
            fs_not_done = [f for f in futures if not f.done]
            fs_not_ready = [f for f in futures if not f.ready]

        else:
            msg = 'ExecutorID {} - Waiting for functions to complete'.format(
                self.executor_id)
            fs_done = [f for f in futures if f.ready or f.done]
            fs_not_done = [f for f in futures if not f.ready and not f.done]
            fs_not_ready = [f for f in futures if not f.ready]

        if not fs_not_done:
            return fs_done, fs_not_done

        logger.info(msg)

        if is_unix_system() and timeout is not None:
            logger.debug(
                'Setting waiting timeout to {} seconds'.format(timeout))
            error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(
                timeout)
            signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg))
            signal.alarm(timeout)

        pbar = None
        error = False

        if not self.is_lithops_worker and self.setup_progressbar and fs_not_ready:
            from tqdm.auto import tqdm

            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}',
                            total=len(fs_not_done))  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                            total=len(fs_not_done),
                            disable=None)

        try:
            if self.rabbitmq_monitor:
                logger.debug('Using RabbitMQ to monitor function activations')
                wait_rabbitmq(futures,
                              self.internal_storage,
                              rabbit_amqp_url=self.rabbit_amqp_url,
                              download_results=download_results,
                              throw_except=throw_except,
                              pbar=pbar,
                              return_when=return_when,
                              THREADPOOL_SIZE=THREADPOOL_SIZE)
            else:
                wait_storage(futures,
                             self.internal_storage,
                             download_results=download_results,
                             throw_except=throw_except,
                             return_when=return_when,
                             pbar=pbar,
                             THREADPOOL_SIZE=THREADPOOL_SIZE,
                             WAIT_DUR_SEC=WAIT_DUR_SEC)

        except KeyboardInterrupt as e:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.ready and not f.done]
            msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}'
                   .format(self.executor_id, len(not_dones_call_ids)))
            if pbar:
                pbar.close()
                print()
            logger.info(msg)
            error = True
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        except Exception as e:
            error = True
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        finally:
            self.invoker.stop()
            if is_unix_system():
                signal.alarm(0)
            if pbar and not pbar.disable:
                pbar.close()
                if not is_notebook():
                    print()
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False)
            if not fs and error and is_notebook():
                del self.futures[len(self.futures) - len(futures):]

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.ready or f.done]
            fs_notdone = [f for f in futures if not f.ready and not f.done]

        return fs_done, fs_notdone

    def get_result(self,
                   fs=None,
                   throw_except=True,
                   timeout=None,
                   THREADPOOL_SIZE=128,
                   WAIT_DUR_SEC=1):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 128
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               THREADPOOL_SIZE=THREADPOOL_SIZE,
                               WAIT_DUR_SEC=WAIT_DUR_SEC)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug("ExecutorID {} Finished getting results".format(
            self.executor_id))

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self, fs=None, dst=None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param dst_dir: destination folder to save .png plots.
        :param dst_file_name: prefix name of the file.
        :param fs: list of futures.
        """
        ftrs = self.futures if not fs else fs

        if type(ftrs) != list:
            ftrs = [ftrs]

        ftrs_to_plot = [f for f in ftrs if (f.ready or f.done) and not f.error]

        if not ftrs_to_plot:
            logger.debug('ExecutorID {} - No futures ready to plot'.format(
                self.executor_id))
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info('ExecutorID {} - Creating execution plots'.format(
            self.executor_id))

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs=None,
              cs=None,
              clean_cloudobjects=True,
              spawn_cleaner=True,
              force=False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: list of futures to clean
        :param cs: list of cloudobjects to clean
        :param clean_cloudobjects: true/false
        :param spawn_cleaner true/false
        """

        os.makedirs(CLEANER_DIR, exist_ok=True)

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info("ExecutorID {} - Cleaning temporary data".format(
                self.executor_id))
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

            self.compute_handler.clear()

        if (jobs_to_clean or cs) and spawn_cleaner:
            log_file = open(CLEANER_LOG_FILE, 'a')
            cmdstr = '{} -m lithops.scripts.cleaner'.format(sys.executable)
            sp.Popen(cmdstr, shell=True, stdout=log_file, stderr=log_file)

    def dismantle(self):
        self.compute_handler.dismantle()

    def __exit__(self, exc_type, exc_value, traceback):
        self.invoker.stop()
Ejemplo n.º 4
0
    def __init__(self, type=None, mode=None, config=None, backend=None, storage=None,
                 runtime=None, runtime_memory=None, rabbitmq_monitor=None,
                 workers=None, remote_invoker=None, log_level=None):

        mode = mode or type

        if mode is None:
            config = default_config(copy.deepcopy(config))
            mode = config['lithops']['mode']

        if mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS, STANDALONE))

        if log_level:
            setup_logger(log_level)

        if type is not None:
            logger.warning("'type' parameter is deprecated and it will be removed"
                           "in future releases. Use 'mode' parameter instead")

        config_ow = {'lithops': {'mode': mode}, mode: {}}

        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.log_active = logger.getEffectiveLevel() != logging.WARNING
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean, spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        self.rabbitmq_monitor = self.config['lithops'].get('rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config['rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     storage_config)

            self.invoker = ServerlessInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)

        logger.info('{} Executor created with ID: {}'
                    .format(mode.capitalize(), self.executor_id))