예제 #1
0
    def __init__(self, pywren_config):
        self.config = pywren_config
        self.rabbitmq_monitor = self.config['pywren'].get(
            'rabbitmq_monitor', False)
        self.store_status = strtobool(os.environ.get('STORE_STATUS', 'True'))
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)

        self.response = {'exception': False}
예제 #2
0
def delete_runtime(name, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    runtimes = compute_handler.list_runtimes(name)
    for runtime in runtimes:
        compute_handler.delete_runtime(runtime[0], runtime[1])
        runtime_key = compute_handler.get_runtime_key(runtime[0], runtime[1])
        internal_storage.delete_runtime_meta(runtime_key)
예제 #3
0
def clean_all(config=None):
    logger.info('Cleaning all PyWren information')
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    # Clean object storage temp dirs
    sh = internal_storage.storage_handler
    runtimes = sh.list_keys(storage_config['bucket'], RUNTIMES_PREFIX)
    if runtimes:
        sh.delete_objects(storage_config['bucket'], runtimes)
    compute_handler.delete_all_runtimes()
    clean_os_bucket(storage_config['bucket'],
                    JOBS_PREFIX,
                    internal_storage,
                    sleep=1)

    # Clean local runtime_meta cache
    if os.path.exists(CACHE_DIR):
        shutil.rmtree(CACHE_DIR)

    # Clean localhost temp dirs
    localhost_jobs_path = os.path.join(TEMP, JOBS_PREFIX)
    if os.path.exists(localhost_jobs_path):
        shutil.rmtree(localhost_jobs_path)
    localhost_runtimes_path = os.path.join(TEMP, RUNTIMES_PREFIX)
    if os.path.exists(localhost_runtimes_path):
        shutil.rmtree(localhost_runtimes_path)
예제 #4
0
def create_runtime(name, memory=None, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    memory = config['pywren']['runtime_memory'] if not memory else memory
    timeout = config['pywren']['runtime_timeout']
    logger.info('Creating runtime: {}, memory: {}'.format(name, memory))

    runtime_key = compute_handler.get_runtime_key(name, memory)
    runtime_meta = compute_handler.create_runtime(name, memory, timeout=timeout)

    try:
        internal_storage.put_runtime_meta(runtime_key, runtime_meta)
    except Exception:
        raise("Unable to upload 'preinstalled-modules' file into {}".format(internal_storage.backend))
예제 #5
0
def update_runtime(name, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    timeout = config['pywren']['runtime_timeout']
    logger.info('Updating runtime: {}'.format(name))

    runtimes = compute_handler.list_runtimes(name)

    for runtime in runtimes:
        runtime_key = compute_handler.get_runtime_key(runtime[0], runtime[1])
        runtime_meta = compute_handler.create_runtime(runtime[0], runtime[1], timeout)

        try:
            internal_storage.put_runtime_meta(runtime_key, runtime_meta)
        except Exception:
            raise("Unable to upload 'preinstalled-modules' file into {}".format(internal_storage.backend))
예제 #6
0
def clean_runtimes(config=None):
    logger.info('Cleaning all runtimes and cache information')
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    # Clean local runtime_meta cache
    if os.path.exists(CACHE_DIR):
        shutil.rmtree(CACHE_DIR)

    sh = internal_storage.storage_handler
    runtimes = sh.list_keys(storage_config['bucket'], 'runtime')
    if runtimes:
        sh.delete_objects(storage_config['bucket'], runtimes)

    compute_handler.delete_all_runtimes()
예제 #7
0
    def __init__(self, config = None, runtime=None, runtime_memory=None,  compute_backend=None, 
                compute_backend_region=None, storage_backend=None, storage_backend_region=None, 
                workers=None, rabbitmq_monitor=None, remote_invoker=None, log_level=None):

        self.start_time = time.time() 
        self._state = self.State.New
        self.is_pywren_function = is_pywren_function()
        
        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if(logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOGLEVEL"] = self.log_level
            if not self.is_pywren_function:
                default_logging_config(self.log_level)
        
        # Overwrite pywren config parameters
        pw_config_ow = {}
        if runtime is not None:
            pw_config_ow['runtime'] = runtime
        if runtime_memory is not None:
            pw_config_ow['runtime_memory'] = runtime_memory
        if compute_backend is not None:
            pw_config_ow['compute_backend'] = compute_backend
        if compute_backend_region is not None:
            pw_config_ow['compute_backend_region'] = compute_backend_region
        if storage_backend is not None:
            pw_config_ow['storage_backend'] = storage_backend
        if storage_backend_region is not None:
            pw_config_ow['storage_backend_region'] = storage_backend_region
        if workers is not None:
            pw_config_ow['workers'] = workers
        if rabbitmq_monitor is not None:
            pw_config_ow['rabbitmq_monitor'] = rabbitmq_monitor
        if remote_invoker is not None:
            pw_config_ow['remote_invoker'] = remote_invoker

        self.config = default_config(copy.deepcopy(config), pw_config_ow)

        self.executor_id = create_executor_id()
        logger.debug('FunctionExecutor created with ID: {}'.format(self.executor_id))

        self.data_cleaner = self.config['pywren'].get('data_cleaner', True)
        self.rabbitmq_monitor = self.config['pywren'].get('rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config['rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since 'amqp_url'"
                                " is not present in configuration")
        
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.invoker = FunctionInvoker(self.config, self.executor_id, self.internal_storage)

        self.futures = []
        self.total_jobs = 0
        self.cleaned_jobs = set()
예제 #8
0
class FunctionExecutor:

    class State:

        New = 'New'
        Running = 'Running'
        Ready = 'Ready'
        Done = 'Done'
        Error = 'Error'
    
    def __init__(self, config = None, runtime=None, runtime_memory=None,  compute_backend=None, 
                compute_backend_region=None, storage_backend=None, storage_backend_region=None, 
                workers=None, rabbitmq_monitor=None, remote_invoker=None, log_level=None):

        self.start_time = time.time() 
        self._state = self.State.New
        self.is_pywren_function = is_pywren_function()
        
        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if(logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOGLEVEL"] = self.log_level
            if not self.is_pywren_function:
                default_logging_config(self.log_level)
        
        # Overwrite pywren config parameters
        pw_config_ow = {}
        if runtime is not None:
            pw_config_ow['runtime'] = runtime
        if runtime_memory is not None:
            pw_config_ow['runtime_memory'] = runtime_memory
        if compute_backend is not None:
            pw_config_ow['compute_backend'] = compute_backend
        if compute_backend_region is not None:
            pw_config_ow['compute_backend_region'] = compute_backend_region
        if storage_backend is not None:
            pw_config_ow['storage_backend'] = storage_backend
        if storage_backend_region is not None:
            pw_config_ow['storage_backend_region'] = storage_backend_region
        if workers is not None:
            pw_config_ow['workers'] = workers
        if rabbitmq_monitor is not None:
            pw_config_ow['rabbitmq_monitor'] = rabbitmq_monitor
        if remote_invoker is not None:
            pw_config_ow['remote_invoker'] = remote_invoker

        self.config = default_config(copy.deepcopy(config), pw_config_ow)

        self.executor_id = create_executor_id()
        logger.debug('FunctionExecutor created with ID: {}'.format(self.executor_id))

        self.data_cleaner = self.config['pywren'].get('data_cleaner', True)
        self.rabbitmq_monitor = self.config['pywren'].get('rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config['rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception("You cannot use rabbitmq_mnonitor since 'amqp_url'"
                                " is not present in configuration")
        
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.invoker = FunctionInvoker(self.config, self.executor_id, self.internal_storage)

        self.futures = []
        self.total_jobs = 0
        self.cleaned_jobs = set()


    def _create_job_id(self, call_type):
            job_id = str(self.total_jobs).zfill(3)
            self.total_jobs += 1
            return '{}{}'.format(call_type, job_id)


    def lidar_call_async(self, func, data, extra_env=None, runtime_memory=None,
                         timeout=EXECUTION_TIMEOUT, include_modules=[], exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_data: Additional data to pass to action. Default None.
        :param extra_env: Additional environment variables for action environment. Default None.
        :param runtime_memory: Memory to use to run the function. Default None (loaded from config).
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: future object.
        """
        job_id = self._create_job_id('A')

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config, self.internal_storage,
                             self.executor_id, job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run(job)
        self.futures.extend(futures)
        self._state = FunctionExecutor.State.Running

        return futures[0]


    def lidar_map(self, map_function, map_iterdata, extra_params=None, extra_env=None, runtime_memory=None,
                  partition_type = None, rows=1, cols=1, timeout=EXECUTION_TIMEOUT, invoke_pool_threads=500,
                  include_modules=[], exclude_modules=[]):
        """
        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param extra_params: Additional parameters to pass to the function activation. Default None.
        :param extra_env: Additional environment variables for action environment. Default None.
        :param runtime_memory: Memory to use to run the function. Default None (loaded from config).
        :param chunk_size: the size of the data chunks to split each object. 'None' for processing
                        the whole file in one function activation.
        :param chunk_n: Number of chunks to split each object. 'None' for processing the whole
                        file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config, self.internal_storage,
                                self.executor_id, job_id,
                                map_function=map_function,
                                iterdata=map_iterdata,
                                runtime_meta=runtime_meta,
                                runtime_memory=runtime_memory,
                                partition_type = partition_type,
                                extra_params=extra_params,
                                extra_env=extra_env,
                                obj_rows=rows,
                                obj_cols=cols,
                                invoke_pool_threads=invoke_pool_threads,
                                include_modules=include_modules,
                                exclude_modules=exclude_modules,
                                execution_timeout=timeout)

        futures = self.invoker.run(job)
        self.futures.extend(futures)
        self._state = FunctionExecutor.State.Running
        if len(futures) == 1:
            return futures[0]
        return futures



    def lidar_map_reduce(self, map_function, map_iterdata, reduce_function, extra_params=None, extra_env=None,
                         map_runtime_memory=None, reduce_runtime_memory=None, chunk_size=None, chunk_n=None,
                         timeout=EXECUTION_TIMEOUT, invoke_pool_threads=500, reducer_one_per_object=False,
                         reducer_wait_local=False, include_modules=[], exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  the function to reduce over the futures
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_params: Additional parameters to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param chunk_size: the size of the data chunks to split each object. 'None' for processing
                           the whole file in one function activation.
        :param chunk_n: Number of chunks to split each object. 'None' for processing the whole
                        file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory)

        map_job = create_map_job(self.config, self.internal_storage,
                                 self.executor_id, map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_params=extra_params,
                                 extra_env=extra_env,
                                 obj_chunk_size=chunk_size,
                                 obj_chunk_number=chunk_n,
                                 invoke_pool_threads=invoke_pool_threads,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout)

        map_futures = self.invoker.run(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            self.wait(fs=map_futures)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory)

        reduce_job = create_reduce_job(self.config, self.internal_storage,
                                       self.executor_id, reduce_job_id,
                                       reduce_function, map_job, map_futures,
                                       runtime_meta=runtime_meta,
                                       reducer_one_per_object=reducer_one_per_object,
                                       runtime_memory=reduce_runtime_memory,
                                       extra_env=extra_env,
                                       include_modules=include_modules,
                                       exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run(reduce_job)

        self.futures.extend(reduce_futures)

        for f in map_futures:
            f.produce_output = False

        self._state = FunctionExecutor.State.Running

        return map_futures + reduce_futures


    def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False,
             timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1):
        """
        Wait for the Future instances (possibly created by different Executor instances)
        given by fs to complete. Returns a named 2-tuple of sets. The first set, named done,
        contains the futures that completed (finished or cancelled futures) before the wait
        completed. The second set, named not_done, contains the futures that did not complete
        (pending or running futures). timeout can be used to control the maximum number of
        seconds to wait before returning.

        :param fs: Futures list. Default None
        :param throw_except: Re-raise exception if call raised. Default True.
        :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS`
        :param download_results: Download results. Default false (Only get statuses)
        :param timeout: Timeout of waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 64
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: `(fs_done, fs_notdone)`
            where `fs_done` is a list of futures that have completed
            and `fs_notdone` is a list of futures that have not completed.
        :rtype: 2-tuple of list
        """
        futures = self.futures if not fs else fs
        if type(futures) != list:
            futures = [futures]
        if not futures:
            raise Exception('You must run the call_async(), map() or map_reduce(), or provide'
                            ' a list of futures before calling the wait()/get_result() method')

        if download_results:
            msg = 'ExecutorID {} - Getting results...'.format(self.executor_id)
        else:
            msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id)
        logger.info(msg)
        if not self.log_level and self._state == FunctionExecutor.State.Running:
            print(msg)

        if is_unix_system() and timeout is not None:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(timeout)

        pbar = None
        if not self.is_pywren_function and self._state == FunctionExecutor.State.Running \
           and not self.log_level:
            from tqdm.auto import tqdm

            if download_results:
                total_to_check = len([f for f in futures if not f.done])
            else:
                total_to_check = len([f for f in futures if not f.ready and not (f.ready or f.done)])

            if is_notebook():
                pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=total_to_check)  # ncols=800
            else:
                print()
                pbar = tqdm(bar_format='  {l_bar}{bar}| {n_fmt}/{total_fmt}  ', total=total_to_check, disable=False)

        try:
            if self.rabbitmq_monitor:
                logger.info('Using RabbitMQ to monitor function activations')
                wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url,
                              download_results=download_results, throw_except=throw_except,
                              pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE)
            else:
                wait_storage(futures, self.internal_storage, download_results=download_results,
                             throw_except=throw_except, return_when=return_when, pbar=pbar,
                             THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC)

        except FunctionException as e:
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                print()
            msg = None
            logger.info(e.msg)
            if not self.log_level:
                print(e.msg)
            if e.exc_msg:
                logger.info('Exception: ' + e.exc_msg)
                if not self.log_level:
                    print('--> Exception: ' + e.exc_msg)
            else:
                print()
                traceback.print_exception(*e.exception)
            self._state = FunctionExecutor.State.Error

        except TimeoutError:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done]
            msg = ('ExecutorID {} - Raised timeout of {} seconds waiting for results - Total Activations not done: {}'
                   .format(self.executor_id, timeout, len(not_dones_call_ids)))
            self._state = FunctionExecutor.State.Error

        except KeyboardInterrupt:
            if download_results:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done]
            else:
                not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done]
            msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}'
                   .format(self.executor_id, len(not_dones_call_ids)))
            self._state = FunctionExecutor.State.Error

        except Exception as e:
            self.invoker.stop()
            if pbar:
                pbar.close()
                print()
            if not self.is_pywren_function:
                self.clean()
            raise e

        finally:
            self.invoker.stop()
            if is_unix_system():
                signal.alarm(0)
            if pbar:
                pbar.close()
                if not is_notebook():
                    print()
            if self._state == FunctionExecutor.State.Error and msg:
                logger.debug(msg)
                if not self.log_level:
                    print(msg)
            if self.data_cleaner and not self.is_pywren_function:
                self.clean()
                if not fs and self._state == FunctionExecutor.State.Error and is_notebook():
                    del self.futures[len(self.futures)-len(futures):]

        if download_results:
            fs_done = [f for f in futures if f.done]
            fs_notdone = [f for f in futures if not f.done]
            self._state = FunctionExecutor.State.Done
        else:
            fs_done = [f for f in futures if f.ready or f.done]
            fs_notdone = [f for f in futures if not f.ready and not f.done]
            self._state = FunctionExecutor.State.Ready

        return fs_done, fs_notdone

    def get_result(self, fs=None, throw_except=True, timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1):
        """
        For getting the results from all function activations

        :param fs: Futures list. Default None
        :param throw_except: Reraise exception if call raised. Default True.
        :param verbose: Shows some information prints. Default False
        :param timeout: Timeout for waiting for results.
        :param THREADPOOL_SIZE: Number of threads to use. Default 128
        :param WAIT_DUR_SEC: Time interval between each check.

        :return: The result of the future/s
        """
        fs_done, unused_fs_notdone = self.wait(fs=fs, throw_except=throw_except,
                                               timeout=timeout, download_results=True,
                                               THREADPOOL_SIZE=THREADPOOL_SIZE,
                                               WAIT_DUR_SEC=WAIT_DUR_SEC)
        result = []
        for f in fs_done:
            if fs and not f.futures and f.produce_output:
                # Process futures provided by the user
                result.append(f.result(throw_except=throw_except, internal_storage=self.internal_storage))
            elif not fs and not f.futures and f.produce_output and not f.read:
                # Process internally stored futures
                result.append(f.result(throw_except=throw_except, internal_storage=self.internal_storage))
                f.read = True

        logger.debug("ExecutorID {} Finished getting results".format(self.executor_id))

        if result and len(result) == 1:
            return result[0]
        return result
    

    def clean(self, fs=None, local_execution=True):
        """
        Deletes all the files from COS. These files include the function,
        the data serialization and the function invocation results.
        """
        futures = self.futures if not fs else fs
        if type(futures) != list:
            futures = [futures]
        if not futures:
            return

        if not fs:
            present_jobs = {(f.executor_id, f.job_id) for f in futures
                            if (f.done or not f.produce_output)
                            and f.executor_id.count('/') == 1}
        else:
            present_jobs = {(f.executor_id, f.job_id) for f in futures
                            if f.executor_id.count('/') == 1}

        jobs_to_clean = present_jobs - self.cleaned_jobs

        if jobs_to_clean:
            msg = "ExecutorID {} - Cleaning temporary data".format(self.executor_id)
            logger.info(msg)
            if not self.log_level:
                print(msg)

        for executor_id, job_id in jobs_to_clean:
            storage_bucket = self.config['pywren']['storage_bucket']
            storage_prerix = '/'.join([JOBS_PREFIX, executor_id, job_id])

            if local_execution:
                # 1st case: Not background. The main code waits until the cleaner finishes its execution.
                # It is not ideal for performance tests, since it can take long time to complete.
                # clean_os_bucket(storage_bucket, storage_prerix, self.internal_storage)

                # 2nd case: Execute in Background as a subprocess. The main program does not wait for its completion.
                storage_config = json.dumps(self.internal_storage.get_storage_config())
                storage_config = storage_config.replace('"', '\\"')

                cmdstr = ('{} -c "from pywren_ibm_cloud.storage.utils import clean_bucket; \
                                  clean_bucket(\'{}\', \'{}\', \'{}\')"'.format(sys.executable,
                                                                                storage_bucket,
                                                                                storage_prerix,
                                                                                storage_config))
                os.popen(cmdstr)
            else:
                extra_env = {'STORE_STATUS': False,
                             'STORE_RESULT': False}
                old_stdout = sys.stdout
                sys.stdout = open(os.devnull, 'w')
                self.call_async(clean_os_bucket, [storage_bucket, storage_prerix], extra_env=extra_env)
                sys.stdout = old_stdout

        self.cleaned_jobs.update(jobs_to_clean)
예제 #9
0
class JobRunner:
    def __init__(self, jr_config, jobrunner_conn):
        start_time = time.time()
        self.jr_config = jr_config
        self.jobrunner_conn = jobrunner_conn

        log_level = self.jr_config['log_level']
        cloud_logging_config(log_level)
        self.pywren_config = self.jr_config['pywren_config']
        self.storage_config = extract_storage_config(self.pywren_config)

        self.call_id = self.jr_config['call_id']
        self.job_id = self.jr_config['job_id']
        self.executor_id = self.jr_config['executor_id']
        self.func_key = self.jr_config['func_key']
        self.data_key = self.jr_config['data_key']
        self.data_byte_range = self.jr_config['data_byte_range']
        self.output_key = self.jr_config['output_key']

        self.stats = stats(self.jr_config['stats_filename'])
        self.stats.write('jobrunner_start', start_time)

        self.show_memory = strtobool(
            os.environ.get('SHOW_MEMORY_USAGE', 'False'))

    def _get_function_and_modules(self):
        """
        Gets and unpickles function and modules from storage
        """
        logger.debug("Getting function and modules")
        func_download_time_t1 = time.time()
        func_obj = self.internal_storage.get_func(self.func_key)
        loaded_func_all = pickle.loads(func_obj)
        func_download_time_t2 = time.time()
        self.stats.write(
            'func_download_time',
            round(func_download_time_t2 - func_download_time_t1, 8))
        logger.debug("Finished getting Function and modules")

        return loaded_func_all

    def _save_modules(self, module_data):
        """
        Save modules, before we unpickle actual function
        """
        if module_data:
            logger.debug("Writing Function dependencies to local disk")
            module_path = os.path.join(PYTHON_MODULE_PATH, self.executor_id,
                                       self.job_id, self.call_id)
            # shutil.rmtree(PYTHON_MODULE_PATH, True)  # delete old modules
            os.makedirs(module_path, exist_ok=True)
            sys.path.append(module_path)

            for m_filename, m_data in module_data.items():
                m_path = os.path.dirname(m_filename)

                if len(m_path) > 0 and m_path[0] == "/":
                    m_path = m_path[1:]
                to_make = os.path.join(module_path, m_path)
                try:
                    os.makedirs(to_make)
                except OSError as e:
                    if e.errno == 17:
                        pass
                    else:
                        raise e
                full_filename = os.path.join(to_make,
                                             os.path.basename(m_filename))

                with open(full_filename, 'wb') as fid:
                    fid.write(b64str_to_bytes(m_data))

            #logger.info("Finished writing {} module files".format(len(module_data)))
            #logger.debug(subprocess.check_output("find {}".format(module_path), shell=True))
            #logger.debug(subprocess.check_output("find {}".format(os.getcwd()), shell=True))
            logger.debug("Finished writing Function dependencies")

    def _unpickle_function(self, pickled_func):
        """
        Unpickle function; it will expect modules to be there
        """
        logger.debug("Unpickle Function")
        loaded_func = pickle.loads(pickled_func)
        logger.debug("Finished Function unpickle")

        return loaded_func

    def _load_data(self):
        extra_get_args = {}
        if self.data_byte_range is not None:
            range_str = 'bytes={}-{}'.format(*self.data_byte_range)
            extra_get_args['Range'] = range_str

        logger.debug("Getting function data")
        data_download_time_t1 = time.time()
        data_obj = self.internal_storage.get_data(
            self.data_key, extra_get_args=extra_get_args)
        logger.debug("Finished getting Function data")
        logger.debug("Unpickle Function data")
        loaded_data = pickle.loads(data_obj)
        logger.debug("Finished unpickle Function data")
        data_download_time_t2 = time.time()
        self.stats.write(
            'data_download_time',
            round(data_download_time_t2 - data_download_time_t1, 8))

        return loaded_data

    def _fill_optional_args(self, function, data):
        """
        Fills in those reserved, optional parameters that might be write to the function signature
        """
        func_sig = inspect.signature(function)

        if 'ibm_cos' in func_sig.parameters:
            if 'ibm_cos' in self.pywren_config:
                try:
                    ibm_boto3_client = Storage(self.pywren_config,
                                               'ibm_cos').get_client()
                    data['ibm_cos'] = ibm_boto3_client
                except Exception as e:
                    logger.error('Cannot create the ibm_cos connection: {}',
                                 str(e))
                    data['ibm_cos'] = None
            else:
                logger.error(
                    'Cannot create the ibm_cos connection: Configuration not provided'
                )
                data['ibm_cos'] = None

        if 'internal_storage' in func_sig.parameters:
            data['internal_storage'] = self.internal_storage

        if 'rabbitmq' in func_sig.parameters:
            if 'rabbitmq' in self.pywren_config:
                try:
                    rabbit_amqp_url = self.pywren_config['rabbitmq'].get(
                        'amqp_url')
                    params = pika.URLParameters(rabbit_amqp_url)
                    connection = pika.BlockingConnection(params)
                    data['rabbitmq'] = connection
                except Exception as e:
                    logger.error('Cannot create the rabbitmq connection: {}',
                                 str(e))
                    data['rabbitmq'] = None
            else:
                logger.error(
                    'Cannot create the rabbitmq connection: Configuration not provided'
                )
                data['rabbitmq'] = None

        if 'id' in func_sig.parameters:
            data['id'] = int(self.call_id)

    def _create_data_stream(self, data):
        """
        Creates the data stream in case of object processing
        """
        extra_get_args = {}
        if 'url' in data:
            url = data['url']
            logger.info('Getting dataset from {}'.format(url.path))
            if url.data_byte_range is not None:
                range_str = 'bytes={}-{}'.format(*url.data_byte_range)
                extra_get_args['Range'] = range_str
                logger.info('Chunk: {} - Range: {}'.format(
                    url.part, extra_get_args['Range']))
            resp = requests.get(url.path, headers=extra_get_args, stream=True)
            url.data_stream = resp.raw

        if 'obj' in data:
            obj = data['obj']
            obj.storage_backend
            storage_handler = Storage(
                self.pywren_config, obj.storage_backend).get_storage_handler()
            logger.info('Getting dataset from {}://{}/{}'.format(
                obj.storage_backend, obj.bucket, obj.key))
            # logger.info("ob.limit_X_values {}".format(obj.limit_X_values))#####

            if obj.data_byte_range is not None:
                extra_get_args['Range'] = 'bytes={}-{}'.format(
                    *obj.data_byte_range)
                logger.info('Chunk: {} - Range: {}'.format(
                    obj.part, extra_get_args['Range']))
                sb = storage_handler.get_object(obj.bucket,
                                                obj.key,
                                                stream=True,
                                                extra_get_args=extra_get_args)
                obj.data_stream = WrappedStreamingBodyPartition(
                    sb, obj.chunk_size, obj.data_byte_range)
            elif obj.data_byte_range is None and (obj.limit_X_values is None or
                                                  obj.limit_Y_values is None):
                obj.data_stream = storage_handler.get_object(obj.bucket,
                                                             obj.key,
                                                             stream=True)
            else:
                obj.data_stream = storage_handler.get_object(obj.bucket,
                                                             obj.key,
                                                             stream=True)
                obj.data_stream = file_part(obj.data_stream,
                                            obj)  #obj.bord_meta,

    def run(self):
        """
        Runs the function
        """
        logger.info("Started")
        result = None
        exception = False
        try:
            self.internal_storage = InternalStorage(self.storage_config)
            self.internal_storage.tmp_obj_prefix = self.output_key.rsplit(
                '/', 1)[0]
            loaded_func_all = self._get_function_and_modules()
            self._save_modules(loaded_func_all['module_data'])
            function = self._unpickle_function(loaded_func_all['func'])
            data = self._load_data()
            logger.info("data_obj {}".format(data))

            if is_object_processing_function(function):
                self._create_data_stream(data)

            self._fill_optional_args(function, data)

            if self.show_memory:
                logger.debug(
                    "Memory usage before call the function: {}".format(
                        get_current_memory_usage()))

            logger.info("Going to execute '{}()'".format(str(
                function.__name__)))
            print('---------------------- FUNCTION LOG ----------------------',
                  flush=True)
            func_exec_time_t1 = time.time()
            result = function(**data)
            func_exec_time_t2 = time.time()
            print('----------------------------------------------------------',
                  flush=True)
            logger.info("Success function execution")

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(
                    get_current_memory_usage()))

            self.stats.write('function_exec_time',
                             round(func_exec_time_t2 - func_exec_time_t1, 8))

            # Check for new futures
            if result is not None:
                self.stats.write("result", True)
                if isinstance(result, ResponseFuture) or \
                   (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)):
                    self.stats.write('new_futures', True)

                logger.debug("Pickling result")
                output_dict = {'result': result}
                pickled_output = pickle.dumps(output_dict)

                if self.show_memory:
                    logger.debug(
                        "Memory usage after output serialization: {}".format(
                            get_current_memory_usage()))
            else:
                logger.debug("No result to store")
                self.stats.write("result", False)

        except Exception:
            exception = True
            self.stats.write("exception", True)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print('----------------------- EXCEPTION !-----------------------',
                  flush=True)
            traceback.print_exc(file=sys.stdout)
            print('----------------------------------------------------------',
                  flush=True)

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(
                    get_current_memory_usage()))

            try:
                logger.debug("Pickling exception")
                pickled_exc = pickle.dumps(
                    (exc_type, exc_value, exc_traceback))
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

            except Exception as pickle_exception:
                # Shockingly often, modules like subprocess don't properly
                # call the base Exception.__init__, which results in them
                # being unpickleable. As a result, we actually wrap this in a try/catch block
                # and more-carefully handle the exceptions if any part of this save / test-reload
                # fails
                self.stats.write("exc_pickle_fail", True)
                pickled_exc = pickle.dumps({
                    'exc_type': str(exc_type),
                    'exc_value': str(exc_value),
                    'exc_traceback': exc_traceback,
                    'pickle_exception': pickle_exception
                })
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))
        finally:
            store_result = strtobool(os.environ.get('STORE_RESULT', 'True'))
            if result is not None and store_result and not exception:
                output_upload_timestamp_t1 = time.time()
                logger.info(
                    "Storing function result - output.pickle - Size: {}".
                    format(sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_timestamp_t2 = time.time()
                self.stats.write(
                    "output_upload_time",
                    round(
                        output_upload_timestamp_t2 -
                        output_upload_timestamp_t1, 8))
            self.jobrunner_conn.send("Finished")
            logger.info("Finished")
예제 #10
0
    def run(self):
        """
        Runs the function
        """
        logger.info("Started")
        result = None
        exception = False
        try:
            self.internal_storage = InternalStorage(self.storage_config)
            self.internal_storage.tmp_obj_prefix = self.output_key.rsplit(
                '/', 1)[0]
            loaded_func_all = self._get_function_and_modules()
            self._save_modules(loaded_func_all['module_data'])
            function = self._unpickle_function(loaded_func_all['func'])
            data = self._load_data()
            logger.info("data_obj {}".format(data))

            if is_object_processing_function(function):
                self._create_data_stream(data)

            self._fill_optional_args(function, data)

            if self.show_memory:
                logger.debug(
                    "Memory usage before call the function: {}".format(
                        get_current_memory_usage()))

            logger.info("Going to execute '{}()'".format(str(
                function.__name__)))
            print('---------------------- FUNCTION LOG ----------------------',
                  flush=True)
            func_exec_time_t1 = time.time()
            result = function(**data)
            func_exec_time_t2 = time.time()
            print('----------------------------------------------------------',
                  flush=True)
            logger.info("Success function execution")

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(
                    get_current_memory_usage()))

            self.stats.write('function_exec_time',
                             round(func_exec_time_t2 - func_exec_time_t1, 8))

            # Check for new futures
            if result is not None:
                self.stats.write("result", True)
                if isinstance(result, ResponseFuture) or \
                   (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)):
                    self.stats.write('new_futures', True)

                logger.debug("Pickling result")
                output_dict = {'result': result}
                pickled_output = pickle.dumps(output_dict)

                if self.show_memory:
                    logger.debug(
                        "Memory usage after output serialization: {}".format(
                            get_current_memory_usage()))
            else:
                logger.debug("No result to store")
                self.stats.write("result", False)

        except Exception:
            exception = True
            self.stats.write("exception", True)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print('----------------------- EXCEPTION !-----------------------',
                  flush=True)
            traceback.print_exc(file=sys.stdout)
            print('----------------------------------------------------------',
                  flush=True)

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(
                    get_current_memory_usage()))

            try:
                logger.debug("Pickling exception")
                pickled_exc = pickle.dumps(
                    (exc_type, exc_value, exc_traceback))
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

            except Exception as pickle_exception:
                # Shockingly often, modules like subprocess don't properly
                # call the base Exception.__init__, which results in them
                # being unpickleable. As a result, we actually wrap this in a try/catch block
                # and more-carefully handle the exceptions if any part of this save / test-reload
                # fails
                self.stats.write("exc_pickle_fail", True)
                pickled_exc = pickle.dumps({
                    'exc_type': str(exc_type),
                    'exc_value': str(exc_value),
                    'exc_traceback': exc_traceback,
                    'pickle_exception': pickle_exception
                })
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))
        finally:
            store_result = strtobool(os.environ.get('STORE_RESULT', 'True'))
            if result is not None and store_result and not exception:
                output_upload_timestamp_t1 = time.time()
                logger.info(
                    "Storing function result - output.pickle - Size: {}".
                    format(sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_timestamp_t2 = time.time()
                self.stats.write(
                    "output_upload_time",
                    round(
                        output_upload_timestamp_t2 -
                        output_upload_timestamp_t1, 8))
            self.jobrunner_conn.send("Finished")
            logger.info("Finished")
예제 #11
0
class CallStatus:
    def __init__(self, pywren_config):
        self.config = pywren_config
        self.rabbitmq_monitor = self.config['pywren'].get(
            'rabbitmq_monitor', False)
        self.store_status = strtobool(os.environ.get('STORE_STATUS', 'True'))
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)

        self.response = {'exception': False}

    def send(self, event_type):
        self.response['type'] = event_type
        if self.store_status:
            if self.rabbitmq_monitor:
                self._send_status_rabbitmq()
            if not self.rabbitmq_monitor or event_type == '__end__':
                self._send_status_os()

    def _send_status_os(self):
        """
        Send the status event to the Object Storage
        """
        executor_id = self.response['executor_id']
        job_id = self.response['job_id']
        call_id = self.response['call_id']

        if self.response['type'] == '__init__':
            init_key = create_init_key(JOBS_PREFIX, executor_id, job_id,
                                       call_id)
            self.internal_storage.put_data(init_key, '')

        elif self.response['type'] == '__end__':
            status_key = create_status_key(JOBS_PREFIX, executor_id, job_id,
                                           call_id)
            dmpd_response_status = json.dumps(self.response)
            drs = sizeof_fmt(len(dmpd_response_status))
            logger.info(
                "Storing execution stats - status.json - Size: {}".format(drs))
            self.internal_storage.put_data(status_key, dmpd_response_status)

    def _send_status_rabbitmq(self):
        """
        Send the status event to RabbitMQ
        """
        dmpd_response_status = json.dumps(self.response)
        drs = sizeof_fmt(len(dmpd_response_status))

        executor_id = self.response['executor_id']
        job_id = self.response['job_id']

        rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
        status_sent = False
        output_query_count = 0
        params = pika.URLParameters(rabbit_amqp_url)
        exchange = 'pywren-{}-{}'.format(executor_id, job_id)

        while not status_sent and output_query_count < 5:
            output_query_count = output_query_count + 1
            try:
                connection = pika.BlockingConnection(params)
                channel = connection.channel()
                channel.exchange_declare(exchange=exchange,
                                         exchange_type='fanout',
                                         auto_delete=True)
                channel.basic_publish(exchange=exchange,
                                      routing_key='',
                                      body=dmpd_response_status)
                connection.close()
                logger.info(
                    "Execution status sent to rabbitmq - Size: {}".format(drs))
                status_sent = True
            except Exception as e:
                logger.error("Unable to send status to rabbitmq")
                logger.error(str(e))
                logger.info('Retrying to send status to rabbitmq...')
                time.sleep(0.2)
예제 #12
0
def function_handler(event):
    start_time = time.time()
    logger.debug("Action handler started")
    response_status = {'exception': False}
    response_status['host_submit_time'] = event['host_submit_time']
    response_status['start_time'] = start_time

    context_dict = {
        'ibm_cf_request_id': os.environ.get("__OW_ACTIVATION_ID"),
        'ibm_cf_python_version': os.environ.get("PYTHON_VERSION"),
    }

    config = event['config']
    storage_config = wrenconfig.extract_storage_config(config)

    log_level = event['log_level']
    ibm_cf_logging_config(log_level)

    call_id = event['call_id']
    callgroup_id = event['callgroup_id']
    executor_id = event['executor_id']
    logger.info("Execution ID: {}/{}/{}".format(executor_id, callgroup_id, call_id))
    job_max_runtime = event.get("job_max_runtime", 590)  # default for CF
    status_key = event['status_key']
    func_key = event['func_key']
    data_key = event['data_key']
    data_byte_range = event['data_byte_range']
    output_key = event['output_key']
    extra_env = event.get('extra_env', {})

    response_status['call_id'] = call_id
    response_status['callgroup_id'] = callgroup_id
    response_status['executor_id'] = executor_id
    # response_status['func_key'] = func_key
    # response_status['data_key'] = data_key
    # response_status['output_key'] = output_key
    # response_status['status_key'] = status_key

    try:
        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "PyWren version mismatch",
                            version.__version__, event['pywren_version'])

        # response_status['free_disk_bytes'] = free_disk_space("/tmp")

        custom_env = {'PYWREN_CONFIG': json.dumps(config),
                      'PYWREN_EXECUTOR_ID':  executor_id,
                      'PYTHONPATH': "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH),
                      'PYTHONUNBUFFERED': 'True'}

        os.environ.update(custom_env)
        os.environ.update(extra_env)

        # pass a full json blob
        jobrunner_config = {'func_key': func_key,
                            'data_key': data_key,
                            'log_level': log_level,
                            'data_byte_range': data_byte_range,
                            'python_module_path': PYTHON_MODULE_PATH,
                            'output_key': output_key,
                            'stats_filename': JOBRUNNER_STATS_FILENAME}

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        setup_time = time.time()
        response_status['setup_time'] = round(setup_time - start_time, 8)

        result_queue = multiprocessing.Queue()
        jr = jobrunner(jobrunner_config, result_queue)
        jr.daemon = True
        logger.info("Starting jobrunner process")
        jr.start()
        jr.join(job_max_runtime)
        response_status['exec_time'] = round(time.time() - setup_time, 8)

        if jr.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            logger.error("Process exceeded maximum runtime of {} seconds".format(job_max_runtime))
            # Send the signal to all the process groups
            jr.terminate()
            raise Exception("OUTATIME",  "Process executed for too long and was killed")

        try:
            # Only 1 message is returned by jobrunner
            result_queue.get(block=False)
        except Exception:
            # If no message, this means that the process was killed due an exception pickling an exception
            raise Exception("EXCPICKLEERROR",  "PyWren was unable to pickle the exception, check function logs")

        # print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        # print(subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        response_status[key] = float(value)
                    except Exception:
                        response_status[key] = value
                    if key == 'exception' or key == 'exc_pickle_fail' \
                       or key == 'result':
                        response_status[key] = eval(value)

        # response_status['server_info'] = get_server_info()
        response_status.update(context_dict)
        response_status['end_time'] = time.time()

    except Exception as e:
        # internal runtime exceptions
        logger.error("There was an exception: {}".format(str(e)))
        response_status['end_time'] = time.time()
        response_status['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
        response_status['exc_info'] = str(pickled_exc)

    finally:
        store_status = strtobool(os.environ.get('STORE_STATUS', 'True'))
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        dmpd_response_status = json.dumps(response_status)
        drs = sizeof_fmt(len(dmpd_response_status))

        if rabbit_amqp_url and store_status:
            status_sent = False
            output_query_count = 0
            while not status_sent and output_query_count < 5:
                output_query_count = output_query_count + 1
                try:
                    params = pika.URLParameters(rabbit_amqp_url)
                    connection = pika.BlockingConnection(params)
                    channel = connection.channel()
                    channel.queue_declare(queue=executor_id, auto_delete=True)
                    channel.basic_publish(exchange='', routing_key=executor_id,
                                          body=dmpd_response_status)
                    connection.close()
                    logger.info("Execution stats sent to rabbitmq - Size: {}".format(drs))
                    status_sent = True
                except Exception as e:
                    logger.error("Unable to send status to rabbitmq")
                    logger.error(str(e))
                    logger.info('Retrying to send stats to rabbitmq...')
                    time.sleep(0.2)
        if store_status:
            internal_storage = InternalStorage(storage_config)
            logger.info("Storing execution stats - status.json - Size: {}".format(drs))
            internal_storage.put_data(status_key, dmpd_response_status)
예제 #13
0
class jobrunner(Process):

    def __init__(self, jr_config, result_queue):
        super().__init__()
        start_time = time.time()
        self.config = jr_config
        log_level = self.config['log_level']
        self.result_queue = result_queue
        ibm_cf_logging_config(log_level)
        self.stats = stats(self.config['stats_filename'])
        self.stats.write('jobrunner_start', start_time)
        pw_config = json.loads(os.environ.get('PYWREN_CONFIG'))
        self.storage_config = extract_storage_config(pw_config)

        if 'SHOW_MEMORY_USAGE' in os.environ:
            self.show_memory = eval(os.environ['SHOW_MEMORY_USAGE'])
        else:
            self.show_memory = False

        self.func_key = self.config['func_key']
        self.data_key = self.config['data_key']
        self.data_byte_range = self.config['data_byte_range']
        self.output_key = self.config['output_key']

    def _get_function_and_modules(self):
        """
        Gets and unpickles function and modules from storage
        """
        logger.debug("Getting function and modules")
        func_download_time_t1 = time.time()
        func_obj = self.internal_storage.get_func(self.func_key)
        loaded_func_all = pickle.loads(func_obj)
        func_download_time_t2 = time.time()
        self.stats.write('func_download_time', round(func_download_time_t2-func_download_time_t1, 8))
        logger.debug("Finished getting Function and modules")

        return loaded_func_all

    def _save_modules(self, module_data):
        """
        Save modules, before we unpickle actual function
        """
        logger.debug("Writing Function dependencies to local disk")
        PYTHON_MODULE_PATH = self.config['python_module_path']
        shutil.rmtree(PYTHON_MODULE_PATH, True)  # delete old modules
        os.mkdir(PYTHON_MODULE_PATH)
        sys.path.append(PYTHON_MODULE_PATH)

        for m_filename, m_data in module_data.items():
            m_path = os.path.dirname(m_filename)

            if len(m_path) > 0 and m_path[0] == "/":
                m_path = m_path[1:]
            to_make = os.path.join(PYTHON_MODULE_PATH, m_path)
            try:
                os.makedirs(to_make)
            except OSError as e:
                if e.errno == 17:
                    pass
                else:
                    raise e
            full_filename = os.path.join(to_make, os.path.basename(m_filename))

            with open(full_filename, 'wb') as fid:
                fid.write(b64str_to_bytes(m_data))

        #logger.info("Finished writing {} module files".format(len(loaded_func_all['module_data'])))
        #logger.debug(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        #logger.debug(subprocess.check_output("find {}".format(os.getcwd()), shell=True))
        logger.debug("Finished writing Function dependencies")

    def _unpickle_function(self, pickled_func):
        """
        Unpickle function; it will expect modules to be there
        """
        logger.debug("Unpickle Function")
        loaded_func = pickle.loads(pickled_func)
        logger.debug("Finished Function unpickle")

        return loaded_func

    def _load_data(self):
        extra_get_args = {}
        if self.data_byte_range is not None:
            range_str = 'bytes={}-{}'.format(*self.data_byte_range)
            extra_get_args['Range'] = range_str

        logger.debug("Getting function data")
        data_download_time_t1 = time.time()
        data_obj = self.internal_storage.get_data(self.data_key, extra_get_args=extra_get_args)
        logger.debug("Finished getting Function data")
        logger.debug("Unpickle Function data")
        loaded_data = pickle.loads(data_obj)
        logger.debug("Finished unpickle Function data")
        data_download_time_t2 = time.time()
        self.stats.write('data_download_time', round(data_download_time_t2-data_download_time_t1, 8))

        return loaded_data

    def _create_storage_clients(self, function, data):
        # Verify storage parameters - Create clients
        func_sig = inspect.signature(function)

        if 'ibm_cos' in func_sig.parameters:
            ibm_boto3_client = IbmCosStorageBackend(self.storage_config['ibm_cos']).get_client()
            data['ibm_cos'] = ibm_boto3_client

        if 'swift' in func_sig.parameters:
            swift_client = SwiftStorageBackend(self.storage_config['swift'])
            data['swift'] = swift_client

        if 'internal_storage' in func_sig.parameters:
            data['internal_storage'] = self.internal_storage

        return data

    def run(self):
        """
        Runs the function
        """
        logger.info("Started")
        # initial output file in case job fails
        result = None
        exception = False
        try:
            self.internal_storage = InternalStorage(self.storage_config)

            loaded_func_all = self._get_function_and_modules()
            self._save_modules(loaded_func_all['module_data'])
            function = self._unpickle_function(loaded_func_all['func'])
            data = self._load_data()
            data = self._create_storage_clients(function, data)

            if self.show_memory:
                logger.debug("Memory usage before call the function: {}".format(get_current_memory_usage()))

            logger.info("Function: Going to execute '{}()'".format(str(function.__name__)))
            print('---------------------- FUNCTION LOG ----------------------', flush=True)
            func_exec_time_t1 = time.time()
            result = function(**data)
            func_exec_time_t2 = time.time()
            print('----------------------------------------------------------', flush=True)
            logger.info("Function: Success execution")

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage()))

            self.stats.write('function_exec_time', round(func_exec_time_t2-func_exec_time_t1, 8))

            # Check for new futures
            if result is not None:
                self.stats.write("result", True)
                if isinstance(result, ResponseFuture):
                    callgroup_id = result.callgroup_id
                    self.stats.write('new_futures', '{}/{}'.format(callgroup_id, 1))
                elif type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture):
                    callgroup_id = result[0].callgroup_id
                    self.stats.write('new_futures', '{}/{}'.format(callgroup_id, len(result)))
                else:
                    self.stats.write('new_futures', '{}/{}'.format(None, 0))

                logger.debug("Pickling result")
                output_dict = {'result': result}
                pickled_output = pickle.dumps(output_dict)

                if self.show_memory:
                    logger.debug("Memory usage after output serialization: {}".format(get_current_memory_usage()))
            else:
                logger.debug("No result to store")
                self.stats.write("result", False)

        except Exception as e:
            exception = True
            self.stats.write("exception", True)
            print('----------------------- EXCEPTION !-----------------------')
            logger.error("There was an exception: {}".format(str(e)))
            print('----------------------------------------------------------', flush=True)

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage()))

            try:
                logger.debug("Pickling exception")
                pickled_exc = pickle.dumps(sys.exc_info())
                pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

            except Exception as pickle_exception:
                # Shockingly often, modules like subprocess don't properly
                # call the base Exception.__init__, which results in them
                # being unpickleable. As a result, we actually wrap this in a try/catch block
                # and more-carefully handle the exceptions if any part of this save / test-reload
                # fails
                logger.debug("Failed pickling exception: {}".format(str(pickle_exception)))
                self.stats.write("exc_pickle_fail", True)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                pickled_exc = pickle.dumps({'exc_type': str(exc_type),
                                            'exc_value': str(exc_value),
                                            'exc_traceback': exc_traceback,
                                            'pickle_exception': pickle_exception})
                pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))
        finally:
            store_result = strtobool(os.environ.get('STORE_RESULT', 'True'))
            if result is not None and store_result and not exception:
                output_upload_timestamp_t1 = time.time()
                logger.info("Storing function result - output.pickle - Size: {}".format(sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_timestamp_t2 = time.time()
                self.stats.write("output_upload_time", round(output_upload_timestamp_t2 - output_upload_timestamp_t1, 8))
            self.result_queue.put("Finished")
            logger.info("Finished")
예제 #14
0
    def result(self, throw_except=True, internal_storage=None):
        """
        Return the value returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param throw_except: Reraise exception if call raised. Default true.
        :param internal_storage: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self._state == ResponseFuture.State.New:
            raise ValueError("task not yet invoked")

        if self._state == ResponseFuture.State.Success:
            return self._return_val

        if self._state == ResponseFuture.State.Futures:
            return self._new_futures

        if self._state == ResponseFuture.State.Error:
            if throw_except:
                reraise(*self._exception)
            else:
                raise FunctionException(self.executor_id, self.job_id,
                                        self.activation_id, self._exception)

        if internal_storage is None:
            internal_storage = InternalStorage(
                storage_config=self.storage_config)

        self.status(throw_except=throw_except,
                    internal_storage=internal_storage)

        if not self.produce_output:
            self._set_state(ResponseFuture.State.Success)

        if self._state == ResponseFuture.State.Success:
            return self._return_val

        if self._state == ResponseFuture.State.Futures:
            return self._new_futures

        call_output_time = time.time()
        call_output = internal_storage.get_call_output(self.executor_id,
                                                       self.job_id,
                                                       self.call_id)
        self.output_query_count += 1

        while call_output is None and self.output_query_count < self.GET_RESULT_MAX_RETRIES:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_output = internal_storage.get_call_output(
                self.executor_id, self.job_id, self.call_id)
            self.output_query_count += 1

        if call_output is None:
            if throw_except:
                raise Exception('Unable to get the output from call {} - '
                                'Activation ID: {}'.format(
                                    self.call_id, self.activation_id))
            else:
                self._set_state(ResponseFuture.State.Error)
                return None

        call_output = pickle.loads(call_output)
        call_output_time_done = time.time()
        self._call_output = call_output

        self._call_metadata[
            'download_output_time'] = call_output_time_done - call_output_time
        self._call_metadata['output_query_count'] = self.output_query_count
        self._call_metadata[
            'download_output_timestamp'] = call_output_time_done

        log_msg = (
            'ExecutorID {} | JobID {} - Got output from call {} - Activation '
            'ID: {}'.format(self.executor_id, self.job_id, self.call_id,
                            self.activation_id))
        logger.info(log_msg)

        function_result = call_output['result']

        if isinstance(function_result, ResponseFuture) or \
           (type(function_result) == list and len(function_result) > 0 and isinstance(function_result[0], ResponseFuture)):
            self._new_futures = [
                function_result
            ] if type(function_result) == ResponseFuture else function_result
            self._set_state(ResponseFuture.State.Futures)
            self._call_metadata['status_done_timestamp'] = self._call_metadata[
                'download_output_timestamp']
            del self._call_metadata['download_output_timestamp']
            return self._new_futures

        else:
            self._return_val = function_result
            self._set_state(ResponseFuture.State.Success)
            return self._return_val
예제 #15
0
    def status(self, throw_except=True, internal_storage=None):
        """
        Return the status returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param check_only: Return None immediately if job is not complete. Default False.
        :param throw_except: Reraise exception if call raised. Default true.
        :param storage_handler: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self._state == ResponseFuture.State.New:
            raise ValueError("task not yet invoked")

        if self._state in [
                ResponseFuture.State.Ready, ResponseFuture.State.Success
        ]:
            return self._call_status

        if internal_storage is None:
            internal_storage = InternalStorage(self.storage_config)

        if self._call_status is None:
            check_storage_path(internal_storage.get_storage_config(),
                               self.storage_path)
            self._call_status = internal_storage.get_call_status(
                self.executor_id, self.job_id, self.call_id)
            self.status_query_count += 1

            while self._call_status is None:
                time.sleep(self.GET_RESULT_SLEEP_SECS)
                self._call_status = internal_storage.get_call_status(
                    self.executor_id, self.job_id, self.call_id)
                self.status_query_count += 1

        self.activation_id = self._call_status['activation_id']

        if self._call_status['type'] == '__init__':
            self._set_state(ResponseFuture.State.Running)
            return self._call_status

        self._call_metadata['host_submit_time'] = self._call_status[
            'host_submit_time']
        self._call_metadata['status_done_timestamp'] = time.time()
        self._call_metadata['status_query_count'] = self.status_query_count

        total_time = format(
            round(
                self._call_status['end_time'] -
                self._call_status['start_time'], 2), '.2f')

        if self._call_status['exception']:
            # the action handler/jobrunner/function had an exception
            self._set_state(ResponseFuture.State.Error)
            self._exception = pickle.loads(eval(self._call_status['exc_info']))
            msg = None

            if not self._call_status.get('exc_pickle_fail', False):
                exception_args = self._exception[1].args
                if exception_args and exception_args[0] == "WRONGVERSION":
                    msg = "PyWren version mismatch: remote library is version {}, local " \
                          "library is version {}".format(exception_args[2], exception_args[3])

                elif exception_args and exception_args[0] == "OUTATIME":
                    msg = "Process ran out of time and was killed"

                elif exception_args and exception_args[0] == "OUTOFMEMORY":
                    msg = "Process exceeded maximum memory and was killed"
            else:
                fault = Exception(self._exception['exc_value'])
                self._exception = (Exception, fault,
                                   self._exception['exc_traceback'])

            if throw_except:
                reraise(*self._exception)
            raise FunctionException(self.executor_id, self.job_id,
                                    self.activation_id, self._exception, msg)

        log_msg = (
            'ExecutorID {} | JobID {} - Got status from call {} - Activation '
            'ID: {} - Time: {} seconds'.format(self.executor_id, self.job_id,
                                               self.call_id,
                                               self.activation_id,
                                               str(total_time)))
        logger.info(log_msg)
        self._set_state(ResponseFuture.State.Ready)

        if not self._call_status['result']:
            self._set_state(ResponseFuture.State.Success)
            self.produce_output = False

        if 'new_futures' in self._call_status:
            self.result(throw_except=throw_except,
                        internal_storage=internal_storage)

        return self._call_status