Python StandaloneHandler Examples

Programming Language: Python

Namespace/Package Name: lithops.standalone.standalone

Examples at hotexamples.com: 18

Python StandaloneHandler - 18 examples found. These are the top rated real world Python examples of lithops.standalone.standalone.StandaloneHandler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StandaloneHandler(14)

clear(3)

dismantle(2)

init(2)

Example #1

Show file

File: utils.py Project: cclauss/lithops

def clean_all(config=None):
    logger.info('Cleaning all Lithops information')
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    default_executor = config['lithops']['executor']
    if default_executor == 'localhost':
        compute_config = extract_localhost_config(config)
        compute_handler = LocalhostHandler(compute_config)
    elif default_executor == 'serverless':
        compute_config = extract_serverless_config(config)
        compute_handler = ServerlessHandler(compute_config, storage_config)
    elif default_executor == 'standalone':
        compute_config = extract_standalone_config(config)
        compute_handler = StandaloneHandler(compute_config)

    compute_handler.clean()

    # Clean object storage temp dirs
    storage = internal_storage.storage
    clean_bucket(storage, storage_config['bucket'], RUNTIMES_PREFIX, sleep=1)
    clean_bucket(storage, storage_config['bucket'], JOBS_PREFIX, sleep=1)

    # Clean localhost executor temp dirs
    shutil.rmtree(STORAGE_DIR, ignore_errors=True)
    # Clean local lithops cache
    shutil.rmtree(CACHE_DIR, ignore_errors=True)

Example #2

Show file

def clean(mode, config, debug):
    log_level = 'INFO' if not debug else 'DEBUG'
    setup_logger(log_level)
    logger.info('Cleaning all Lithops information')
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    mode = config['lithops']['mode'] if not mode else mode
    if mode == LOCALHOST:
        compute_config = extract_localhost_config(config)
        compute_handler = LocalhostHandler(compute_config)
    elif mode == SERVERLESS:
        compute_config = extract_serverless_config(config)
        compute_handler = ServerlessHandler(compute_config, storage_config)
    elif mode == STANDALONE:
        compute_config = extract_standalone_config(config)
        compute_handler = StandaloneHandler(compute_config)

    compute_handler.clean()

    # Clean object storage temp dirs
    storage = internal_storage.storage
    clean_bucket(storage, storage_config['bucket'], RUNTIMES_PREFIX, sleep=1)
    clean_bucket(storage, storage_config['bucket'], JOBS_PREFIX, sleep=1)

    # Clean localhost executor temp dirs
    shutil.rmtree(LITHOPS_TEMP_DIR, ignore_errors=True)
    # Clean local lithops cache
    shutil.rmtree(CACHE_DIR, ignore_errors=True)

Example #3

Show file

    def __init__(self, config):
        threading.Thread.__init__(self)
        self.last_usage_time = time.time()

        self.standalone_config = config
        self.auto_dismantle = config['auto_dismantle']
        self.soft_dismantle_timeout = config['soft_dismantle_timeout']
        self.hard_dismantle_timeout = config['hard_dismantle_timeout']
        self.exec_mode = config['exec_mode']

        self.jobs = {}

        with open(SA_DATA_FILE, 'r') as ad:
            instance_data = json.load(ad)

        self.sh = StandaloneHandler(self.standalone_config)
        self.instance = self.sh.backend.get_instance(**instance_data)

        logger.debug(
            "Starting BudgetKeeper for {} ({}), instance ID: {}".format(
                self.instance.name, self.instance.private_ip,
                self.instance.instance_id))
        logger.debug(
            f"Delete {self.instance.name} on dismantle: {self.instance.delete_on_dismantle}"
        )

Example #4

Show file

def create(name, mode, memory, timeout, config):
    setup_logger(logging.DEBUG)
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    mode = config['lithops']['mode'] if not mode else mode
    if mode == SERVERLESS:
        compute_config = extract_serverless_config(config)
        compute_handler = ServerlessHandler(compute_config, storage_config)
        mem = memory if memory else compute_config['runtime_memory']
        to = timeout if timeout else compute_config['runtime_timeout']
        runtime_key = compute_handler.get_runtime_key(name, mem)
        runtime_meta = compute_handler.create_runtime(name, mem, timeout=to)
    elif mode == STANDALONE:
        compute_config = extract_standalone_config(config)
        compute_handler = StandaloneHandler(compute_config)
        runtime_key = compute_handler.get_runtime_key(name)
        runtime_meta = compute_handler.create_runtime(name)
    elif mode == LOCALHOST:
        compute_config = extract_localhost_config(config)
        compute_handler = LocalhostHandler(compute_config)
        runtime_key = compute_handler.get_runtime_key(name)
        runtime_meta = compute_handler.create_runtime(name)
    else:
        raise Exception('Unknown execution mode {}'.format(mode))

    try:
        internal_storage.put_runtime_meta(runtime_key, runtime_meta)
    except Exception:
        raise("Unable to upload 'preinstalled-modules' file into {}".format(internal_storage.backend))

Example #5

Show file

File: keeper.py Project: zhanggbj/lithops

    def __init__(self, config):
        threading.Thread.__init__(self)
        self.last_usage_time = time.time()

        self.standalone_config = config
        self.jobs = {}

        vm_data_file = os.path.join(STANDALONE_INSTALL_DIR, 'access.data')
        with open(vm_data_file, 'r') as ad:
            vm_data = json.load(ad)

        self.instance_name = vm_data['instance_name']
        self.ip_address = vm_data['ip_address']
        self.instance_id = vm_data['instance_id']

        logger.info(
            "Starting BudgetKeeper for {} ({}), instance ID: {}".format(
                self.instance_name, self.ip_address, self.instance_id))

        self.sh = StandaloneHandler(self.standalone_config)

        self.auto_dismantle = self.sh.auto_dismantle
        self.soft_dismantle_timeout = self.sh.soft_dismantle_timeout
        self.hard_dismantle_timeout = self.sh.hard_dismantle_timeout

        self.vm = self.sh.backend.get_vm(self.instance_name)
        self.vm.ip_address = self.ip_address
        self.vm.instance_id = self.instance_id
        self.vm.delete_on_dismantle = False if 'master' in self.instance_name else True

Example #6

Show file

File: cli.py Project: aitorarjona/lithops

def attach(config, backend, start, debug):
    """Create or attach to a SSH session on Lithops master VM"""
    if config:
        config = load_yaml_config(config)

    log_level = logging.INFO if not debug else logging.DEBUG
    setup_lithops_logger(log_level)

    config_ow = set_config_ow(backend)
    config = default_config(config, config_ow)

    if config['lithops']['mode'] != STANDALONE:
        raise Exception(
            'lithops attach method is only available for standalone backends')

    compute_config = extract_standalone_config(config)
    compute_handler = StandaloneHandler(compute_config)
    compute_handler.init()

    if start:
        compute_handler.backend.master.start()

    master_ip = compute_handler.backend.master.get_public_ip()
    user = compute_handler.backend.master.ssh_credentials['username']
    key_file = compute_handler.backend.master.ssh_credentials[
        'key_filename'] or '~/.ssh/id_rsa'
    key_file = os.path.abspath(os.path.expanduser(key_file))

    if not os.path.exists(key_file):
        raise Exception(f'Private key file {key_file} does not exists')

    print(f'Got master VM public IP address: {master_ip}')
    print(f'Loading ssh private key from: {key_file}')
    print('Creating SSH Connection to lithops master VM')
    cmd = (
        'ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" '
        f'-i {key_file} {user}@{master_ip}')

    compute_handler.backend.master.wait_ready()

    sp.run(shlex.split(cmd))

Example #7

Show file

def init_keeper():
    global keeper

    config_file = os.path.join(REMOTE_INSTALL_DIR, 'config')
    with open(config_file, 'r') as cf:
        serverfull_config = json.load(cf)
    handler = StandaloneHandler(serverfull_config)

    if handler.auto_dismantle:
        keeper = threading.Thread(target=budget_keeper, args=(handler,))
        keeper.daemon = True
        keeper.start()

Example #8

Show file

def init_keeper():
    global keeper
    global backend_handler

    config_file = os.path.join(REMOTE_INSTALL_DIR, 'config')
    with open(config_file, 'r') as cf:
        standalone_config = json.load(cf)

    backend_handler = StandaloneHandler(standalone_config)
    keeper = threading.Thread(target=budget_keeper)
    keeper.daemon = True
    keeper.start()

Example #9

Show file

File: keeper.py Project: misfithell/lithops

    def __init__(self, config):
        threading.Thread.__init__(self)
        self.last_usage_time = time.time()

        self.standalone_config = config
        self.jobs = {}

        access_data = os.path.join(STANDALONE_INSTALL_DIR, 'access.data')
        with open(access_data, 'r') as ad:
            vsi_details = json.load(ad)
        self.instance_name = vsi_details['instance_name']
        self.ip_address = vsi_details['ip_address']
        self.instance_id = vsi_details['instance_id']

        logger.info(("Starting BudgetKeeper for {} ({}), instance ID: {}"
                     .format(self.instance_name, self.ip_address, self.instance_id)))

        self.standalone_handler = StandaloneHandler(self.standalone_config)
        vsi = self.standalone_handler.backend.create_worker(self.instance_name)
        vsi.ip_address = self.ip_address
        vsi.instance_id = self.instance_id
        vsi.delete_on_dismantle = False if 'master' in self.instance_name else True

Example #10

Show file

def clean(config, mode, backend, storage, debug):
    if config:
        config = load_yaml_config(config)

    log_level = logging.INFO if not debug else logging.DEBUG
    setup_lithops_logger(log_level)
    logger.info('Cleaning all Lithops information')

    mode = mode or get_mode(backend, config)
    config_ow = {'lithops': {'mode': mode}}
    if storage:
        config_ow['lithops']['storage'] = storage
    if backend:
        config_ow[mode] = {'backend': backend}
    config = default_config(config, config_ow)

    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    mode = config['lithops']['mode'] if not mode else mode
    if mode == LOCALHOST:
        compute_config = extract_localhost_config(config)
        compute_handler = LocalhostHandler(compute_config)
    elif mode == SERVERLESS:
        compute_config = extract_serverless_config(config)
        compute_handler = ServerlessHandler(compute_config, storage_config)
    elif mode == STANDALONE:
        compute_config = extract_standalone_config(config)
        compute_handler = StandaloneHandler(compute_config)

    compute_handler.clean()

    # Clean object storage temp dirs
    storage = internal_storage.storage
    clean_bucket(storage, storage_config['bucket'], RUNTIMES_PREFIX, sleep=1)
    clean_bucket(storage, storage_config['bucket'], JOBS_PREFIX, sleep=1)

    # Clean localhost executor temp dirs
    shutil.rmtree(LITHOPS_TEMP_DIR, ignore_errors=True)
    # Clean local lithops cache
    shutil.rmtree(CACHE_DIR, ignore_errors=True)

Example #11

Show file

class FunctionExecutor:
    """
    Executor abstract class that contains the common logic for the Localhost, Serverless and Standalone executors

    :param mode: Execution mode. One of: localhost, serverless or standalone
    :param config: Settings passed in here will override those in lithops_config
    :param backend: Compute backend to run the functions
    :param storage: Storage backend to store Lithops data
    :param runtime: Name of the runtime to run the functions
    :param runtime_memory: Memory (in MB) to use to run the functions
    :param monitoring: Monitoring system implementation. One of: storage, rabbitmq
    :param max_workers: Max number of parallel workers
    :param worker_processes: Worker granularity, number of concurrent/parallel processes in each worker
    :param remote_invoker: Spawn a function that will perform the actual job invocation (True/False)
    :param log_level: Log level printing (INFO, DEBUG, ...). Set it to None to hide all logs. If this is param is set, all logging params in config are disabled
    """
    def __init__(self,
                 mode: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None,
                 backend: Optional[str] = None,
                 storage: Optional[str] = None,
                 runtime: Optional[str] = None,
                 runtime_memory: Optional[int] = None,
                 monitoring: Optional[str] = None,
                 max_workers: Optional[int] = None,
                 worker_processes: Optional[int] = None,
                 remote_invoker: Optional[bool] = None,
                 log_level: Optional[str] = False):
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()
        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # overwrite user-provided parameters
        config_ow = {'lithops': {}, 'backend': {}}
        if runtime is not None:
            config_ow['backend']['runtime'] = runtime
        if runtime_memory is not None:
            config_ow['backend']['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow['backend']['remote_invoker'] = remote_invoker
        if worker_processes is not None:
            config_ow['backend']['worker_processes'] = worker_processes
        if max_workers is not None:
            config_ow['backend']['max_workers'] = max_workers

        if mode is not None:
            config_ow['lithops']['mode'] = mode
        if backend is not None:
            config_ow['lithops']['backend'] = backend
        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        # Load configuration
        self.config = default_config(copy.deepcopy(config), config_ow)

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            atexit.register(self.clean,
                            clean_cloudobjects=False,
                            clean_fn=True)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.backend = self.config['lithops']['backend']
        self.mode = self.config['lithops']['mode']

        if self.mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif self.mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif self.mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        self.job_monitor = JobMonitor(executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      config=self.config)

        # Create the invoker
        self.invoker = create_invoker(config=self.config,
                                      executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      compute_handler=self.compute_handler,
                                      job_monitor=self.job_monitor)

        logger.debug(
            f'Function executor for {self.backend} created with ID: {self.executor_id}'
        )

        self.log_path = None

    def __enter__(self):
        """ Context manager method """
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """ Context manager method """
        self.job_monitor.stop()
        self.invoker.stop()
        self.compute_handler.clear()

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func: Callable,
                   data: Union[List[Any], Tuple[Any, ...], Dict[str, Any]],
                   extra_env: Optional[Dict] = None,
                   runtime_memory: Optional[int] = None,
                   timeout: Optional[int] = None,
                   include_modules: Optional[List] = [],
                   exclude_modules: Optional[List] = []) -> ResponseFuture:
        """
        For running one function execution asynchronously.

        :param func: The function to map over the data.
        :param data: Input data. Arguments can be passed as a list or tuple, or as a dictionary for keyword arguments.
        :param extra_env: Additional env variables for function environment.
        :param runtime_memory: Memory to use to run the function.
        :param timeout: Time that the function has to complete its execution before raising a timeout.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: Response future.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function: Callable,
            map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str,
                                                                      Any]]],
            chunksize: Optional[int] = None,
            extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                       Dict[str, Any]]] = None,
            extra_env: Optional[Dict[str, str]] = None,
            runtime_memory: Optional[int] = None,
            obj_chunk_size: Optional[int] = None,
            obj_chunk_number: Optional[int] = None,
            timeout: Optional[int] = None,
            include_modules: Optional[List[str]] = [],
            exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Spawn multiple function activations based on the items of an input list.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data (e.g python list).
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk
        :param extra_args: Additional arguments to pass to each map_function activation
        :param extra_env: Additional environment variables for function environment
        :param runtime_memory: Memory (in MB) to use to run the functions
        :param obj_chunk_size: Used for data processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Used for data processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set
        :param timeout: Max time per function activation (seconds)
        :param include_modules: Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None
        :param exclude_modules: Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules.

        :return: A list with size `len(map_iterdata)` of futures for each job (Futures are also internally stored by Lithops).
        """

        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        return create_futures_list(futures, self)

    def map_reduce(self,
                   map_function: Callable,
                   map_iterdata: List[Union[List[Any], Tuple[Any, ...],
                                            Dict[str, Any]]],
                   reduce_function: Callable,
                   chunksize: Optional[int] = None,
                   extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                              Dict[str, Any]]] = None,
                   extra_env: Optional[Dict[str, str]] = None,
                   map_runtime_memory: Optional[int] = None,
                   reduce_runtime_memory: Optional[int] = None,
                   obj_chunk_size: Optional[int] = None,
                   obj_chunk_number: Optional[int] = None,
                   timeout: Optional[int] = None,
                   reducer_one_per_object: Optional[bool] = False,
                   spawn_reducer: Optional[int] = 20,
                   include_modules: Optional[List[str]] = [],
                   exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Map the map_function over the data and apply the reduce_function across all futures.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data
        :param reduce_function: The function to reduce over the futures
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1
        :param extra_args: Additional arguments to pass to function activation. Default None
        :param extra_env: Additional environment variables for action environment. Default None
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config)
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config)
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation
        :param timeout: Time that the functions have to complete their execution before raising a timeout
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param spawn_reducer: Percentage of done map functions before spawning the reduce function
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(config=self.config,
                                 internal_storage=self.internal_storage,
                                 executor_id=self.executor_id,
                                 job_id=map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        if spawn_reducer != ALWAYS:
            self.wait(map_futures, return_when=spawn_reducer)
            logger.debug(
                f'ExecutorID {self.executor_id} | JobID {map_job_id} - '
                f'{spawn_reducer}% of map activations done. Spawning reduce stage'
            )

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            config=self.config,
            internal_storage=self.internal_storage,
            executor_id=self.executor_id,
            reduce_job_id=reduce_job_id,
            reduce_function=reduce_function,
            map_job=map_job,
            map_futures=map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return create_futures_list(map_futures + reduce_futures, self)

    def wait(
        self,
        fs: Optional[Union[ResponseFuture, FuturesList,
                           List[ResponseFuture]]] = None,
        throw_except: Optional[bool] = True,
        return_when: Optional[Any] = ALL_COMPLETED,
        download_results: Optional[bool] = False,
        timeout: Optional[int] = None,
        threadpool_size: Optional[int] = THREADPOOL_SIZE,
        wait_dur_sec: Optional[int] = WAIT_DUR_SEC
    ) -> Tuple[FuturesList, FuturesList]:
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True
        :param return_when: Percentage of done futures
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results
        :param threadpool_size: Number of threads to use. Default 64
        :param wait_dur_sec: Time interval between each check

        :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed.
        """
        futures = fs or self.futures
        if type(futures) != list and type(futures) != FuturesList:
            futures = [futures]

        # Start waiting for results
        try:
            wait(fs=futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 timeout=timeout,
                 threadpool_size=threadpool_size,
                 wait_dur_sec=wait_dur_sec)

            if self.data_cleaner and return_when == ALL_COMPLETED:
                present_jobs = {f.job_key for f in futures}
                self.compute_handler.clear(present_jobs)
                self.clean(clean_cloudobjects=False)

        except (KeyboardInterrupt, Exception) as e:
            self.invoker.stop()
            self.job_monitor.stop()
            if not fs and is_notebook():
                del self.futures[len(self.futures) - len(futures):]
            if self.data_cleaner:
                present_jobs = {f.job_key for f in futures}
                self.compute_handler.clear(present_jobs)
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.success or f.done]
            fs_notdone = [f for f in futures if not f.success and not f.done]

        return create_futures_list(fs_done, self), create_futures_list(
            fs_notdone, self)

    def get_result(self,
                   fs: Optional[Union[ResponseFuture, FuturesList,
                                      List[ResponseFuture]]] = None,
                   throw_except: Optional[bool] = True,
                   timeout: Optional[int] = None,
                   threadpool_size: Optional[int] = THREADPOOL_SIZE,
                   wait_dur_sec: Optional[int] = WAIT_DUR_SEC):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param timeout: Timeout for waiting for results.
        :param threadpool_size: Number of threads to use. Default 128
        :param wait_dur_sec: Time interval between each check.
        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               threadpool_size=threadpool_size,
                               wait_dur_sec=wait_dur_sec)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug(
            f'ExecutorID {self.executor_id} - Finished getting results')

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self,
             fs: Optional[Union[ResponseFuture, List[ResponseFuture],
                                FuturesList]] = None,
             dst: Optional[str] = None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param fs: list of futures.
        :param dst: destination path to save .png plots.
        """
        ftrs = self.futures if not fs else fs

        if isinstance(ftrs, ResponseFuture):
            ftrs = [ftrs]

        ftrs_to_plot = [
            f for f in ftrs if (f.success or f.done) and not f.error
        ]

        if not ftrs_to_plot:
            logger.debug(
                f'ExecutorID {self.executor_id} - No futures ready to plot')
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info(
            f'ExecutorID {self.executor_id} - Creating execution plots')

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs: Optional[Union[ResponseFuture, List[ResponseFuture]]] = None,
              cs: Optional[List[CloudObject]] = None,
              clean_cloudobjects: Optional[bool] = True,
              clean_fn: Optional[bool] = False,
              force: Optional[bool] = False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: List of futures to clean
        :param cs: List of cloudobjects to clean
        :param clean_cloudobjects: Delete all cloudobjects created with this executor
        :param clan_fn: Delete cached functions in this executor
        :param force: Clean all future objects even if they have not benn completed
        """
        global CLEANER_PROCESS

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        if clean_fn:
            data = {
                'fn_to_clean': self.executor_id,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info(
                f'ExecutorID {self.executor_id} - Cleaning temporary data')
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

        spawn_cleaner = not (CLEANER_PROCESS
                             and CLEANER_PROCESS.poll() is None)
        if (jobs_to_clean or cs) and spawn_cleaner:
            cmd = [sys.executable, '-m', 'lithops.scripts.cleaner']
            CLEANER_PROCESS = sp.Popen(cmd, start_new_session=True)

    def job_summary(self, cloud_objects_n: Optional[int] = 0):
        """
        Logs information of a job executed by the calling function executor.
        currently supports: code_engine, ibm_vpc and ibm_cf.

        :param cloud_objects_n: number of cloud object used in COS, declared by user.
        """
        import pandas as pd
        import numpy as np

        def init():
            headers = [
                'Job_ID', 'Function', 'Invocations', 'Memory(MB)',
                'AvgRuntime', 'Cost', 'CloudObjects'
            ]
            pd.DataFrame([], columns=headers).to_csv(self.log_path,
                                                     index=False)

        def append(content):
            """ appends job information to log file."""
            pd.DataFrame(content).to_csv(self.log_path,
                                         mode='a',
                                         header=False,
                                         index=False)

        def append_summary():
            """ add a summary row to the log file"""
            df = pd.read_csv(self.log_path)
            total_average = sum(
                df.AvgRuntime * df.Invocations) / df.Invocations.sum()
            total_row = pd.DataFrame([[
                'Summary', ' ',
                df.Invocations.sum(), df['Memory(MB)'].sum(),
                round(total_average, 10),
                df.Cost.sum(), cloud_objects_n
            ]])
            total_row.to_csv(self.log_path,
                             mode='a',
                             header=False,
                             index=False)

        def get_object_num():
            """returns cloud objects used up to this point, using this function executor. """
            df = pd.read_csv(self.log_path)
            return float(df.iloc[-1].iloc[-1])

        # Avoid logging info unless chosen computational backend is supported.
        if hasattr(self.compute_handler.backend, 'calc_cost'):

            if self.log_path:  # retrieve cloud_objects_n from last log file
                cloud_objects_n += get_object_num()
            else:
                self.log_path = os.path.join(
                    constants.LOGS_DIR,
                    datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv"))
            # override current logfile
            init()

            futures = self.futures
            if type(futures) != list:
                futures = [futures]

            memory = []
            runtimes = []
            curr_job_id = futures[0].job_id
            job_func = futures[
                0].function_name  # each job is conducted on a single function

            for future in futures:
                if curr_job_id != future.job_id:
                    cost = self.compute_handler.backend.calc_cost(
                        runtimes, memory)
                    append([[
                        curr_job_id, job_func,
                        len(runtimes),
                        sum(memory),
                        np.round(np.average(runtimes), 10), cost, ' '
                    ]])

                    # updating next iteration's variables:
                    curr_job_id = future.job_id
                    job_func = future.function_name
                    memory.clear()
                    runtimes.clear()

                memory.append(future.runtime_memory)
                runtimes.append(future.stats['worker_exec_time'])

            # appends last Job-ID
            cost = self.compute_handler.backend.calc_cost(runtimes, memory)
            append([[
                curr_job_id, job_func,
                len(runtimes),
                sum(memory),
                np.round(np.average(runtimes), 10), cost, ' '
            ]])
            # append summary row to end of the dataframe
            append_summary()

        else:  # calc_cost() doesn't exist for chosen computational backend.
            logger.warning(
                "Could not log job: {} backend isn't supported by this function."
                .format(self.compute_handler.backend.name))
            return
        logger.info("View log file logs at {}".format(self.log_path))

Example #12

Show file

File: executors.py Project: syllogy/lithops

    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 monitoring=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        monitoring_backend = self.config['lithops']['monitoring'].lower()
        monitoring_config = self.config.get(monitoring_backend)
        self.job_monitor = JobMonitor(monitoring_backend, monitoring_config)

        # Create the invokder
        self.invoker = create_invoker(self.config, self.executor_id,
                                      self.internal_storage,
                                      self.compute_handler, self.job_monitor)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

        self.log_path = None

Example #13

Show file

File: executors.py Project: syllogy/lithops

class FunctionExecutor:
    """
    Executor abstract class that contains the common logic
    for the Localhost, Serverless and Standalone executors
    """
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 monitoring=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        monitoring_backend = self.config['lithops']['monitoring'].lower()
        monitoring_config = self.config.get(monitoring_backend)
        self.job_monitor = JobMonitor(monitoring_backend, monitoring_config)

        # Create the invokder
        self.invoker = create_invoker(self.config, self.executor_id,
                                      self.internal_storage,
                                      self.compute_handler, self.job_monitor)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

        self.log_path = None

    def __enter__(self):
        """ Context manager method """
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """ Context manager method """
        self.job_monitor.stop()
        self.invoker.stop()

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   runtime_memory=None,
                   timeout=None,
                   include_modules=[],
                   exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param timeout: Time that the functions have to complete their
                        execution before raising a timeout
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: future object.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function,
            map_iterdata,
            chunksize=None,
            worker_processes=None,
            extra_args=None,
            extra_env=None,
            runtime_memory=None,
            chunk_size=None,
            chunk_n=None,
            obj_chunk_size=None,
            obj_chunk_number=None,
            timeout=None,
            invoke_pool_threads=None,
            include_modules=[],
            exclude_modules=[]):
        """
        For running multiple function executions asynchronously

        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1
        :param extra_args: Additional args to pass to the function activations
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param obj_chunk_size: the size of the data chunks to split each object.
                           'None' for processing the whole file in one function
                           activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for
                                 processing the whole file in one function activation
        :param remote_invocation: Enable or disable remote_invocation mechanism
        :param timeout: Time that the functions have to complete their execution
                        before raising a timeout
        :param invoke_pool_threads: Number of threads to use to invoke
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             worker_processes=worker_processes,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             chunk_size=chunk_size,
                             chunk_n=chunk_n,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunksize=None,
                   worker_processes=None,
                   extra_args=None,
                   extra_env=None,
                   map_runtime_memory=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   reduce_runtime_memory=None,
                   chunk_size=None,
                   chunk_n=None,
                   timeout=None,
                   invoke_pool_threads=None,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   include_modules=[],
                   exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker Default 1
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_args: Additional arguments to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing
                               the whole file in one function activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole
                                 file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(self.config,
                                 self.internal_storage,
                                 self.executor_id,
                                 map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 worker_processes=worker_processes,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 chunk_size=chunk_size,
                                 chunk_n=chunk_n,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout,
                                 invoke_pool_threads=invoke_pool_threads)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            wait(fs=map_futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            self.config,
            self.internal_storage,
            self.executor_id,
            reduce_job_id,
            reduce_function,
            map_job,
            map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return map_futures + reduce_futures

    def wait(self,
             fs=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             download_results=False,
             timeout=None,
             threadpool_size=THREADPOOL_SIZE,
             wait_dur_sec=WAIT_DUR_SEC):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param threadpool_size: Number of threads to use. Default 64
        :param wait_dur_sec: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = fs or self.futures
        if type(futures) != list:
            futures = [futures]

        # Start waiting for results
        try:
            wait(fs=futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor,
                 download_results=download_results,
                 throw_except=throw_except,
                 return_when=return_when,
                 timeout=timeout,
                 threadpool_size=threadpool_size,
                 wait_dur_sec=wait_dur_sec)

        except Exception as e:
            self.invoker.stop()
            if not fs and is_notebook():
                del self.futures[len(self.futures) - len(futures):]
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        finally:
            present_jobs = {f.job_key for f in futures}
            self.job_monitor.stop(present_jobs)
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False)

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.success or f.done]
            fs_notdone = [f for f in futures if not f.success and not f.done]

        return fs_done, fs_notdone

    def get_result(self,
                   fs=None,
                   throw_except=True,
                   timeout=None,
                   threadpool_size=THREADPOOL_SIZE,
                   wait_dur_sec=WAIT_DUR_SEC):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 128
        :param WAIT_DUR_SEC: Time interval between each check.
        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               threadpool_size=threadpool_size,
                               wait_dur_sec=wait_dur_sec)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug("ExecutorID {} Finished getting results".format(
            self.executor_id))

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self, fs=None, dst=None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param dst_dir: destination folder to save .png plots.
        :param dst_file_name: prefix name of the file.
        :param fs: list of futures.
        """
        ftrs = self.futures if not fs else fs

        if type(ftrs) != list:
            ftrs = [ftrs]

        ftrs_to_plot = [
            f for f in ftrs if (f.success or f.done) and not f.error
        ]

        if not ftrs_to_plot:
            logger.debug('ExecutorID {} - No futures ready to plot'.format(
                self.executor_id))
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info('ExecutorID {} - Creating execution plots'.format(
            self.executor_id))

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs=None,
              cs=None,
              clean_cloudobjects=True,
              spawn_cleaner=True,
              force=False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: list of futures to clean
        :param cs: list of cloudobjects to clean
        :param clean_cloudobjects: true/false
        :param spawn_cleaner true/false
        """

        os.makedirs(CLEANER_DIR, exist_ok=True)

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info("ExecutorID {} - Cleaning temporary data".format(
                self.executor_id))
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

            self.compute_handler.clear()

        if (jobs_to_clean or cs) and spawn_cleaner:
            log_file = open(CLEANER_LOG_FILE, 'a')
            cmdstr = [sys.executable, '-m', 'lithops.scripts.cleaner']
            sp.Popen(' '.join(cmdstr),
                     shell=True,
                     stdout=log_file,
                     stderr=log_file)

    def job_summary(self, cloud_objects_n=0):
        """
        logs information of a job executed by the calling function executor.
        currently supports: code_engine, ibm_vpc and ibm_cf.
        on future commits, support will extend to code_engine and ibm_vpc :

        :param cloud_objects_n: number of cloud object used in COS, declared by user.
        """
        import pandas as pd

        def init():
            headers = [
                'Job_ID', 'Function', 'Invocations', 'Memory(MB)',
                'AvgRuntime', 'Cost', 'CloudObjects'
            ]
            pd.DataFrame([], columns=headers).to_csv(self.log_path,
                                                     index=False)

        def append(content):
            """ appends job information to log file."""
            pd.DataFrame(content).to_csv(self.log_path,
                                         mode='a',
                                         header=False,
                                         index=False)

        def append_summary():
            """ add a summary row to the log file"""
            df = pd.read_csv(self.log_path)
            total_average = sum(
                df.AvgRuntime * df.Invocations) / df.Invocations.sum()
            total_row = pd.DataFrame([[
                'Summary', ' ',
                df.Invocations.sum(), df['Memory(MB)'].sum(),
                round(total_average, 10),
                df.Cost.sum(), cloud_objects_n
            ]])
            total_row.to_csv(self.log_path,
                             mode='a',
                             header=False,
                             index=False)

        def get_object_num():
            """returns cloud objects used up to this point, using this function executor. """
            df = pd.read_csv(self.log_path)
            return float(df.iloc[-1].iloc[-1])

        # Avoid logging info unless chosen computational backend is supported.
        if hasattr(self.compute_handler.backend, 'calc_cost'):

            if self.log_path:  # retrieve cloud_objects_n from last log file
                cloud_objects_n += get_object_num()
            else:
                self.log_path = os.path.join(
                    constants.LOGS_DIR,
                    datetime.now().strftime("%Y-%m-%d_%H:%M:%S.csv"))
            # override current logfile
            init()

            futures = self.futures
            if type(futures) != list:
                futures = [futures]

            memory = []
            runtimes = []
            curr_job_id = futures[0].job_id
            job_func = futures[
                0].function_name  # each job is conducted on a single function

            for future in futures:
                if curr_job_id != future.job_id:
                    cost = self.compute_handler.backend.calc_cost(
                        runtimes, memory)
                    append([[
                        curr_job_id, job_func,
                        len(runtimes),
                        sum(memory),
                        np.round(np.average(runtimes), 10), cost, ' '
                    ]])

                    # updating next iteration's variables:
                    curr_job_id = future.job_id
                    job_func = future.function_name
                    memory.clear()
                    runtimes.clear()

                memory.append(future.runtime_memory)
                runtimes.append(future.stats['worker_exec_time'])

            # appends last Job-ID
            cost = self.compute_handler.backend.calc_cost(runtimes, memory)
            append([[
                curr_job_id, job_func,
                len(runtimes),
                sum(memory),
                np.round(np.average(runtimes), 10), cost, ' '
            ]])
            # append summary row to end of the dataframe
            append_summary()

        else:  # calc_cost() doesn't exist for chosen computational backend.
            logger.warning(
                "Could not log job: {} backend isn't supported by this function."
                .format(self.compute_handler.backend.name))
            return
        logger.info("View log file logs at {}".format(self.log_path))

Example #14

Show file

class FunctionExecutor:
    """
    Executor abstract class that contains the common logic
    for the Localhost, Serverless and Standalone executors
    """
    def __init__(self,
                 mode=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 rabbitmq_monitor=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=False):
        """ Create a FunctionExecutor Class """
        if mode and mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS,
                                             STANDALONE))

        self.is_lithops_worker = is_lithops_worker()

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        self.setup_progressbar = (not self.is_lithops_worker
                                  and log_level is not None and
                                  logger.getEffectiveLevel() == logging.INFO)

        # load mode of execution
        mode = mode or get_mode(backend, config)
        config_ow = {'lithops': {'mode': mode}, mode: {}}

        # overwrite user-provided parameters
        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean,
                            spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        self.rabbitmq_monitor = self.config['lithops'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     storage_config)

            if self.config[mode].get('customized_runtime'):
                self.invoker = CustomizedRuntimeInvoker(
                    self.config, self.executor_id, self.internal_storage,
                    self.compute_handler)
            else:
                self.invoker = ServerlessInvoker(self.config, self.executor_id,
                                                 self.internal_storage,
                                                 self.compute_handler)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)

        logger.info('{} Executor created with ID: {}'.format(
            mode.capitalize(), self.executor_id))

    def __enter__(self):
        return self

    def _create_job_id(self, call_type):
        job_id = str(self.total_jobs).zfill(3)
        self.total_jobs += 1
        return '{}{}'.format(call_type, job_id)

    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   runtime_memory=None,
                   timeout=None,
                   include_modules=[],
                   exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param timeout: Time that the functions have to complete their
                        execution before raising a timeout
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: future object.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run(job)
        self.futures.extend(futures)

        return futures[0]

    def map(self,
            map_function,
            map_iterdata,
            chunksize=None,
            worker_processes=None,
            extra_args=None,
            extra_env=None,
            runtime_memory=None,
            chunk_size=None,
            chunk_n=None,
            obj_chunk_size=None,
            obj_chunk_number=None,
            timeout=None,
            invoke_pool_threads=None,
            include_modules=[],
            exclude_modules=[]):
        """
        For running multiple function executions asynchronously

        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1
        :param extra_args: Additional args to pass to the function activations
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param obj_chunk_size: the size of the data chunks to split each object.
                           'None' for processing the whole file in one function
                           activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for
                                 processing the whole file in one function activation
        :param remote_invocation: Enable or disable remote_invocation mechanism
        :param timeout: Time that the functions have to complete their execution
                        before raising a timeout
        :param invoke_pool_threads: Number of threads to use to invoke
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             worker_processes=worker_processes,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             chunk_size=chunk_size,
                             chunk_n=chunk_n,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run(job)
        self.futures.extend(futures)

        return futures

    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunksize=None,
                   worker_processes=None,
                   extra_args=None,
                   extra_env=None,
                   map_runtime_memory=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   reduce_runtime_memory=None,
                   chunk_size=None,
                   chunk_n=None,
                   timeout=None,
                   invoke_pool_threads=None,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   include_modules=[],
                   exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker Default 1
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_args: Additional arguments to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing
                               the whole file in one function activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole
                                 file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(self.config,
                                 self.internal_storage,
                                 self.executor_id,
                                 map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 worker_processes=worker_processes,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 chunk_size=chunk_size,
                                 chunk_n=chunk_n,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout,
                                 invoke_pool_threads=invoke_pool_threads)

        map_futures = self.invoker.run(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            self.wait(fs=map_futures)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            self.config,
            self.internal_storage,
            self.executor_id,
            reduce_job_id,
            reduce_function,
            map_job,
            map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run(reduce_job)

        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return map_futures + reduce_futures

    def wait(self,
             fs=None,
             throw_except=True,
             return_when=ALL_COMPLETED,
             download_results=False,
             timeout=None,
             THREADPOOL_SIZE=128,
             WAIT_DUR_SEC=1):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = fs or self.futures
        if type(futures) != list:
            futures = [futures]

        if not futures:
            raise Exception(
                'You must run the call_async(), map() or map_reduce(), or provide'
                ' a list of futures before calling the wait()/get_result() method'
            )

        if download_results:
            msg = 'ExecutorID {} - Getting results'.format(self.executor_id)
            fs_done = [f for f in futures if f.done]
            fs_not_done = [f for f in futures if not f.done]
            fs_not_ready = [f for f in futures if not f.ready]

        else:
            msg = 'ExecutorID {} - Waiting for functions to complete'.format(
                self.executor_id)
            fs_done = [f for f in futures if f.ready or f.done]
            fs_not_done = [f for f in futures if not f.ready and not f.done]
            fs_not_ready = [f for f in futures if not f.ready]

        if not fs_not_done:
            return fs_done, fs_not_done

        logger.info(msg)

        if is_unix_system() and timeout is not None:
            logger.debug(
                'Setting waiting timeout to {} seconds'.format(timeout))
            error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(
                timeout)
            signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg))
            signal.alarm(timeout)

        pbar = None
        error = False

        if not self.is_lithops_worker and self.setup_progressbar and fs_not_ready:
            from tqdm.auto import tqdm

            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}',
                            total=len(fs_not_done))  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ',
                            total=len(fs_not_done),
                            disable=None)

        try:
            if self.rabbitmq_monitor:
                logger.debug('Using RabbitMQ to monitor function activations')
                wait_rabbitmq(futures,
                              self.internal_storage,
                              rabbit_amqp_url=self.rabbit_amqp_url,
                              download_results=download_results,
                              throw_except=throw_except,
                              pbar=pbar,
                              return_when=return_when,
                              THREADPOOL_SIZE=THREADPOOL_SIZE)
            else:
                wait_storage(futures,
                             self.internal_storage,
                             download_results=download_results,
                             throw_except=throw_except,
                             return_when=return_when,
                             pbar=pbar,
                             THREADPOOL_SIZE=THREADPOOL_SIZE,
                             WAIT_DUR_SEC=WAIT_DUR_SEC)

        except KeyboardInterrupt as e:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures
                                      if not f.ready and not f.done]
            msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}'
                   .format(self.executor_id, len(not_dones_call_ids)))
            if pbar:
                pbar.close()
                print()
            logger.info(msg)
            error = True
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        except Exception as e:
            error = True
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False, force=True)
            raise e

        finally:
            self.invoker.stop()
            if is_unix_system():
                signal.alarm(0)
            if pbar and not pbar.disable:
                pbar.close()
                if not is_notebook():
                    print()
            if self.data_cleaner and not self.is_lithops_worker:
                self.clean(clean_cloudobjects=False)
            if not fs and error and is_notebook():
                del self.futures[len(self.futures) - len(futures):]

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
        else:
            fs_done = [f for f in futures if f.ready or f.done]
            fs_notdone = [f for f in futures if not f.ready and not f.done]

        return fs_done, fs_notdone

    def get_result(self,
                   fs=None,
                   throw_except=True,
                   timeout=None,
                   THREADPOOL_SIZE=128,
                   WAIT_DUR_SEC=1):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 128
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: The result of the future/s
        """
        fs_done, _ = self.wait(fs=fs,
                               throw_except=throw_except,
                               timeout=timeout,
                               download_results=True,
                               THREADPOOL_SIZE=THREADPOOL_SIZE,
                               WAIT_DUR_SEC=WAIT_DUR_SEC)
        result = []
        fs_done = [f for f in fs_done if not f.futures and f._produce_output]
        for f in fs_done:
            if fs:
                # Process futures provided by the user
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
            elif not fs and not f._read:
                # Process internally stored futures
                result.append(
                    f.result(throw_except=throw_except,
                             internal_storage=self.internal_storage))
                f._read = True

        logger.debug("ExecutorID {} Finished getting results".format(
            self.executor_id))

        if len(result) == 1 and self.last_call != 'map':
            return result[0]

        return result

    def plot(self, fs=None, dst=None):
        """
        Creates timeline and histogram of the current execution in dst_dir.

        :param dst_dir: destination folder to save .png plots.
        :param dst_file_name: prefix name of the file.
        :param fs: list of futures.
        """
        ftrs = self.futures if not fs else fs

        if type(ftrs) != list:
            ftrs = [ftrs]

        ftrs_to_plot = [f for f in ftrs if (f.ready or f.done) and not f.error]

        if not ftrs_to_plot:
            logger.debug('ExecutorID {} - No futures ready to plot'.format(
                self.executor_id))
            return

        logging.getLogger('matplotlib').setLevel(logging.WARNING)
        from lithops.plots import create_timeline, create_histogram

        logger.info('ExecutorID {} - Creating execution plots'.format(
            self.executor_id))

        create_timeline(ftrs_to_plot, dst)
        create_histogram(ftrs_to_plot, dst)

    def clean(self,
              fs=None,
              cs=None,
              clean_cloudobjects=True,
              spawn_cleaner=True,
              force=False):
        """
        Deletes all the temp files from storage. These files include the function,
        the data serialization and the function invocation results. It can also clean
        cloudobjects.

        :param fs: list of futures to clean
        :param cs: list of cloudobjects to clean
        :param clean_cloudobjects: true/false
        :param spawn_cleaner true/false
        """

        os.makedirs(CLEANER_DIR, exist_ok=True)

        def save_data_to_clean(data):
            with tempfile.NamedTemporaryFile(dir=CLEANER_DIR,
                                             delete=False) as temp:
                pickle.dump(data, temp)

        if cs:
            data = {
                'cos_to_clean': list(cs),
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            if not fs:
                return

        futures = fs or self.futures
        futures = [futures] if type(futures) != list else futures
        present_jobs = {
            create_job_key(f.executor_id, f.job_id)
            for f in futures
            if (f.executor_id.count('-') == 1 and f.done) or force
        }
        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            logger.info("ExecutorID {} - Cleaning temporary data".format(
                self.executor_id))
            data = {
                'jobs_to_clean': jobs_to_clean,
                'clean_cloudobjects': clean_cloudobjects,
                'storage_config': self.internal_storage.get_storage_config()
            }
            save_data_to_clean(data)
            self.cleaned_jobs.update(jobs_to_clean)

            self.compute_handler.clear()

        if (jobs_to_clean or cs) and spawn_cleaner:
            log_file = open(CLEANER_LOG_FILE, 'a')
            cmdstr = '{} -m lithops.scripts.cleaner'.format(sys.executable)
            sp.Popen(cmdstr, shell=True, stdout=log_file, stderr=log_file)

    def dismantle(self):
        self.compute_handler.dismantle()

    def __exit__(self, exc_type, exc_value, traceback):
        self.invoker.stop()

Example #15

Show file

    def __init__(self,
                 type=None,
                 config=None,
                 backend=None,
                 storage=None,
                 runtime=None,
                 runtime_memory=None,
                 rabbitmq_monitor=None,
                 workers=None,
                 remote_invoker=None,
                 log_level=None):

        if type is None:
            config = default_config(copy.deepcopy(config))
            type = config['lithops']['executor']

        if log_level:
            default_logging_config(log_level)

        config_ow = {'lithops': {'executor': type}, type: {}}

        if runtime is not None:
            config_ow[type]['runtime'] = runtime
        if backend is not None:
            config_ow[type]['backend'] = backend
        if runtime_memory is not None:
            config_ow[type]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[type]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.log_active = logger.getEffectiveLevel() != logging.WARNING
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        self.rabbitmq_monitor = self.config['lithops'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        self.storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(self.storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.total_jobs = 0
        self.cleaned_jobs = set()
        self.last_call = None

        if type == 'localhost':
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif type == 'serverless':
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.storage_config)

            self.invoker = ServerlessInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif type == 'standalone':
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config, self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        else:
            raise Exception("Function executor type must be one of "
                            "'localhost', 'serverless' or 'standalone'")

        logger.info('{} Executor created with ID: {}'.format(
            type.capitalize(), self.executor_id))

Example #16

Show file

File: executors.py Project: otrack/lithops

    def __init__(self, type=None, mode=None, config=None, backend=None, storage=None,
                 runtime=None, runtime_memory=None, rabbitmq_monitor=None,
                 workers=None, remote_invoker=None, log_level=None):

        mode = mode or type

        if mode is None:
            config = default_config(copy.deepcopy(config))
            mode = config['lithops']['mode']

        if mode not in [LOCALHOST, SERVERLESS, STANDALONE]:
            raise Exception("Function executor mode must be one of '{}', '{}' "
                            "or '{}'".format(LOCALHOST, SERVERLESS, STANDALONE))

        if log_level:
            setup_logger(log_level)

        if type is not None:
            logger.warning("'type' parameter is deprecated and it will be removed"
                           "in future releases. Use 'mode' parameter instead")

        config_ow = {'lithops': {'mode': mode}, mode: {}}

        if runtime is not None:
            config_ow[mode]['runtime'] = runtime
        if backend is not None:
            config_ow[mode]['backend'] = backend
        if runtime_memory is not None:
            config_ow[mode]['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow[mode]['remote_invoker'] = remote_invoker

        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if workers is not None:
            config_ow['lithops']['workers'] = workers
        if rabbitmq_monitor is not None:
            config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor

        self.config = default_config(copy.deepcopy(config), config_ow)

        self.log_active = logger.getEffectiveLevel() != logging.WARNING
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            spawn_cleaner = int(self.executor_id.split('-')[1]) == 0
            atexit.register(self.clean, spawn_cleaner=spawn_cleaner,
                            clean_cloudobjects=False)

        self.rabbitmq_monitor = self.config['lithops'].get('rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config['rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since "
                                "'amqp_url' is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        if mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)

            self.invoker = StandaloneInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     storage_config)

            self.invoker = ServerlessInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)
        elif mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

            self.invoker = StandaloneInvoker(self.config,
                                             self.executor_id,
                                             self.internal_storage,
                                             self.compute_handler)

        logger.info('{} Executor created with ID: {}'
                    .format(mode.capitalize(), self.executor_id))

Example #17

Show file

    def __init__(self,
                 mode: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None,
                 backend: Optional[str] = None,
                 storage: Optional[str] = None,
                 runtime: Optional[str] = None,
                 runtime_memory: Optional[int] = None,
                 monitoring: Optional[str] = None,
                 max_workers: Optional[int] = None,
                 worker_processes: Optional[int] = None,
                 remote_invoker: Optional[bool] = None,
                 log_level: Optional[str] = False):
        self.is_lithops_worker = is_lithops_worker()
        self.executor_id = create_executor_id()
        self.futures = []
        self.cleaned_jobs = set()
        self.total_jobs = 0
        self.last_call = None

        # setup lithops logging
        if not self.is_lithops_worker:
            # if is lithops worker, logging has been set up in entry_point.py
            if log_level:
                setup_lithops_logger(log_level)
            elif log_level is False and logger.getEffectiveLevel(
            ) == logging.WARNING:
                # Set default logging from config
                setup_lithops_logger(*get_log_info(config))

        # overwrite user-provided parameters
        config_ow = {'lithops': {}, 'backend': {}}
        if runtime is not None:
            config_ow['backend']['runtime'] = runtime
        if runtime_memory is not None:
            config_ow['backend']['runtime_memory'] = int(runtime_memory)
        if remote_invoker is not None:
            config_ow['backend']['remote_invoker'] = remote_invoker
        if worker_processes is not None:
            config_ow['backend']['worker_processes'] = worker_processes
        if max_workers is not None:
            config_ow['backend']['max_workers'] = max_workers

        if mode is not None:
            config_ow['lithops']['mode'] = mode
        if backend is not None:
            config_ow['lithops']['backend'] = backend
        if storage is not None:
            config_ow['lithops']['storage'] = storage
        if monitoring is not None:
            config_ow['lithops']['monitoring'] = monitoring

        # Load configuration
        self.config = default_config(copy.deepcopy(config), config_ow)

        self.data_cleaner = self.config['lithops'].get('data_cleaner', True)
        if self.data_cleaner and not self.is_lithops_worker:
            atexit.register(self.clean,
                            clean_cloudobjects=False,
                            clean_fn=True)

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.storage = self.internal_storage.storage

        self.backend = self.config['lithops']['backend']
        self.mode = self.config['lithops']['mode']

        if self.mode == LOCALHOST:
            localhost_config = extract_localhost_config(self.config)
            self.compute_handler = LocalhostHandler(localhost_config)
        elif self.mode == SERVERLESS:
            serverless_config = extract_serverless_config(self.config)
            self.compute_handler = ServerlessHandler(serverless_config,
                                                     self.internal_storage)
        elif self.mode == STANDALONE:
            standalone_config = extract_standalone_config(self.config)
            self.compute_handler = StandaloneHandler(standalone_config)

        # Create the monitoring system
        self.job_monitor = JobMonitor(executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      config=self.config)

        # Create the invoker
        self.invoker = create_invoker(config=self.config,
                                      executor_id=self.executor_id,
                                      internal_storage=self.internal_storage,
                                      compute_handler=self.compute_handler,
                                      job_monitor=self.job_monitor)

        logger.debug(
            f'Function executor for {self.backend} created with ID: {self.executor_id}'
        )

        self.log_path = None

Example #18

Show file

File: keeper.py Project: misfithell/lithops

class BudgetKeeper(threading.Thread):
    """
    BudgetKeeper class used to automatically stop the VM instance
    """
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.last_usage_time = time.time()

        self.standalone_config = config
        self.jobs = {}

        access_data = os.path.join(STANDALONE_INSTALL_DIR, 'access.data')
        with open(access_data, 'r') as ad:
            vsi_details = json.load(ad)
        self.instance_name = vsi_details['instance_name']
        self.ip_address = vsi_details['ip_address']
        self.instance_id = vsi_details['instance_id']

        logger.info(("Starting BudgetKeeper for {} ({}), instance ID: {}"
                     .format(self.instance_name, self.ip_address, self.instance_id)))

        self.standalone_handler = StandaloneHandler(self.standalone_config)
        vsi = self.standalone_handler.backend.create_worker(self.instance_name)
        vsi.ip_address = self.ip_address
        vsi.instance_id = self.instance_id
        vsi.delete_on_dismantle = False if 'master' in self.instance_name else True

    def update_config(self, config):
        self.standalone_config.update(config)
        self.standalone_handler.auto_dismantle = config['auto_dismantle']
        self.standalone_handler.soft_dismantle_timeout = config['soft_dismantle_timeout']
        self.standalone_handler.hard_dismantle_timeout = config['hard_dismantle_timeout']

    def run(self):
        runing = True
        jobs_running = False

        logger.info("BudgetKeeper started")

        if self.standalone_handler.auto_dismantle:
            logger.info('Auto dismantle activated - Soft timeout: {}s, Hard Timeout: {}s'
                        .format(self.standalone_handler.soft_dismantle_timeout,
                                self.standalone_handler.hard_dismantle_timeout))
        else:
            # If auto_dismantle is deactivated, the VM will be always automatically
            # stopped after hard_dismantle_timeout. This will prevent the VM
            # being started forever due a wrong configuration
            logger.info('Auto dismantle deactivated - Hard Timeout: {}s'
                        .format(self.standalone_handler.hard_dismantle_timeout))

        while runing:
            time_since_last_usage = time.time() - self.last_usage_time
            check_interval = self.standalone_handler.soft_dismantle_timeout / 10
            for job_key in self.jobs.keys():
                done = os.path.join(JOBS_DIR, job_key+'.done')
                if os.path.isfile(done):
                    self.jobs[job_key] = 'done'
            if len(self.jobs) > 0 and all(value == 'done' for value in self.jobs.values()) \
               and self.standalone_handler.auto_dismantle:

                # here we need to catch a moment when number of running JOBS become zero.
                # when it happens we reset countdown back to soft_dismantle_timeout
                if jobs_running:
                    jobs_running = False
                    self.last_usage_time = time.time()
                    time_since_last_usage = time.time() - self.last_usage_time

                time_to_dismantle = int(self.standalone_handler.soft_dismantle_timeout - time_since_last_usage)
            else:
                time_to_dismantle = int(self.standalone_handler.hard_dismantle_timeout - time_since_last_usage)
                jobs_running = True

            if time_to_dismantle > 0:
                logger.info("Time to dismantle: {} seconds".format(time_to_dismantle))
                time.sleep(check_interval)
            else:
                logger.info("Dismantling setup")
                try:
                    self.standalone_handler.dismantle()
                    runing = False
                except Exception as e:
                    logger.info("Dismantle error {}".format(e))