예제 #1
0
def load_config(config_data=None):
    if config_data is None:
        config_data = {}

    if 'runtime_memory' not in config_data['cloudbutton']:
        config_data['cloudbutton']['runtime_memory'] = RUNTIME_MEMORY_DEFAULT
    if 'runtime_timeout' not in config_data['cloudbutton']:
        config_data['cloudbutton']['runtime_timeout'] = RUNTIME_TIMEOUT_DEFAULT
    if 'runtime' not in config_data['cloudbutton']:
        config_data['cloudbutton']['runtime'] = 'python' + \
            version_str(sys.version_info)
    if 'workers' not in config_data['cloudbutton']:
        config_data['cloudbutton']['workers'] = MAX_CONCURRENT_WORKERS

    if config_data['cloudbutton'][
            'runtime_memory'] not in RUNTIME_MEMORY_OPTIONS:
        raise Exception('{} MB runtime is not available (Only {} MB)'.format(
            config_data['cloudbutton']['runtime_memory'],
            RUNTIME_MEMORY_OPTIONS))

    if config_data['cloudbutton']['runtime_memory'] > RUNTIME_MEMORY_MAX:
        config_data['cloudbutton']['runtime_memory'] = RUNTIME_MEMORY_MAX
    if config_data['cloudbutton']['runtime_timeout'] > RUNTIME_TIMEOUT_DEFAULT:
        config_data['cloudbutton']['runtime_timeout'] = RUNTIME_TIMEOUT_DEFAULT

    if 'gcp' not in config_data:
        raise Exception("'gcp' section is mandatory in the configuration")

    config_data['gcp']['retries'] = RETRIES
    config_data['gcp']['retry_sleeps'] = RETRY_SLEEPS

    # Put storage data into compute backend config dict entry
    storage_config = dict()
    storage_config['cloudbutton'] = config_data['cloudbutton'].copy()
    storage_config['gcp_storage'] = config_data['gcp'].copy()
    config_data['gcp']['storage'] = cloudbutton_config.extract_storage_config(
        storage_config)

    required_parameters_0 = ('project_name', 'service_account',
                             'credentials_path')
    if not set(required_parameters_0) <= set(config_data['gcp']):
        raise Exception(
            "'project_name', 'service_account' and 'credentials_path' \
        are mandatory under 'gcp' section")

    if not exists(config_data['gcp']['credentials_path']) or not isfile(
            config_data['gcp']['credentials_path']):
        raise Exception("Path {} must be credentials JSON file.".format(
            config_data['gcp']['credentials_path']))

    config_data['gcp_functions'] = config_data['gcp'].copy()
    if 'region' not in config_data['gcp_functions']:
        config_data['gcp_functions']['region'] = config_data['pywren'][
            'compute_backend_region']
예제 #2
0
def delete_runtime(name, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    runtimes = compute_handler.list_runtimes(name)
    for runtime in runtimes:
        compute_handler.delete_runtime(runtime[0], runtime[1])
        runtime_key = compute_handler.get_runtime_key(runtime[0], runtime[1])
        internal_storage.delete_runtime_meta(runtime_key)
예제 #3
0
    def __init__(self, config, num_invokers, log_level):
        self.config = config
        self.num_invokers = num_invokers
        self.log_level = log_level
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        compute_config = extract_compute_config(self.config)

        self.remote_invoker = self.config['cloudbutton'].get('remote_invoker', False)
        self.rabbitmq_monitor = self.config['cloudbutton'].get('rabbitmq_monitor', False)
        if self.rabbitmq_monitor:
            self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')

        self.num_workers = self.config['cloudbutton'].get('workers')
        logger.debug('Total workers: {}'.format(self.num_workers))

        self.compute_handlers = []
        cb = compute_config['backend']
        regions = compute_config[cb].get('region')
        if regions and type(regions) == list:
            for region in regions:
                new_compute_config = compute_config.copy()
                new_compute_config[cb]['region'] = region
                compute_handler = Compute(new_compute_config)
                self.compute_handlers.append(compute_handler)
        else:
            if cb == 'localhost':
                global CBH
                if cb in CBH and CBH[cb].compute_handler.num_workers != self.num_workers:
                    del CBH[cb]
                if cb in CBH:
                    logger.info('{} compute handler already started'.format(cb))
                    compute_handler = CBH[cb]
                    self.compute_handlers.append(compute_handler)
                else:
                    logger.info('Starting {} compute handler'.format(cb))
                    compute_handler = Compute(compute_config)
                    CBH[cb] = compute_handler
                    self.compute_handlers.append(compute_handler)
            else:
                compute_handler = Compute(compute_config)
                self.compute_handlers.append(compute_handler)

        self.token_bucket_q = Queue()
        self.pending_calls_q = Queue()

        self.job_monitor = JobMonitor(self.config, self.internal_storage, self.token_bucket_q)
예제 #4
0
def run_tests(test_to_run, config=None):
    global CONFIG, STORAGE_CONFIG, STORAGE

    CONFIG = json.load(args.config) if config else default_config()
    STORAGE_CONFIG = extract_storage_config(CONFIG)
    STORAGE = InternalStorage(STORAGE_CONFIG).storage_handler

    suite = unittest.TestSuite()
    if test_to_run == 'all':
        suite.addTest(unittest.makeSuite(TestPywren))
    else:
        try:
            suite.addTest(TestPywren(test_to_run))
        except ValueError:
            print("unknown test, use: --help")
            sys.exit()

    runner = unittest.TextTestRunner()
    runner.run(suite)
예제 #5
0
def clean_all(config=None):
    logger.info('Cleaning all Cloudbutton information')
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    # Clean localhost executor temp dirs
    shutil.rmtree(STORAGE_FOLDER, ignore_errors=True)
    shutil.rmtree(DOCKER_FOLDER, ignore_errors=True)

    # Clean object storage temp dirs
    compute_handler.delete_all_runtimes()
    sh = internal_storage.storage_handler
    clean_bucket(sh, storage_config['bucket'], RUNTIMES_PREFIX, sleep=1)
    clean_bucket(sh, storage_config['bucket'], JOBS_PREFIX, sleep=1)

    # Clean local cloudbutton cache
    shutil.rmtree(CACHE_DIR, ignore_errors=True)
예제 #6
0
def create_runtime(name, memory=None, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    memory = config['cloudbutton']['runtime_memory'] if not memory else memory
    timeout = config['cloudbutton']['runtime_timeout']

    logger.info('Creating runtime: {}, memory: {}'.format(name, memory))

    runtime_key = compute_handler.get_runtime_key(name, memory)
    runtime_meta = compute_handler.create_runtime(name,
                                                  memory,
                                                  timeout=timeout)

    try:
        internal_storage.put_runtime_meta(runtime_key, runtime_meta)
    except Exception:
        raise ("Unable to upload 'preinstalled-modules' file into {}".format(
            internal_storage.backend))
예제 #7
0
    def __init__(self, config, executor_id, internal_storage):
        self.log_level = os.getenv('CLOUDBUTTON_LOGLEVEL')
        self.config = config
        self.executor_id = executor_id
        self.storage_config = extract_storage_config(self.config)
        self.internal_storage = internal_storage
        self.compute_config = extract_compute_config(self.config)
        self.is_cloudbutton_function = is_cloudbutton_function()
        self.invokers = []

        self.remote_invoker = self.config['cloudbutton'].get(
            'remote_invoker', False)
        self.workers = self.config['cloudbutton'].get('workers')
        logger.debug('ExecutorID {} - Total available workers: {}'.format(
            self.executor_id, self.workers))

        self.compute_handlers = []
        cb = self.compute_config['backend']
        regions = self.compute_config[cb].get('region')
        if regions and type(regions) == list:
            for region in regions:
                compute_config = self.compute_config.copy()
                compute_config[cb]['region'] = region
                compute_handler = Compute(compute_config)
                self.compute_handlers.append(compute_handler)
        else:
            compute_handler = Compute(self.compute_config)
            self.compute_handlers.append(compute_handler)

        logger.debug('ExecutorID {} - Creating function invoker'.format(
            self.executor_id))

        self.token_bucket_q = Queue()
        self.pending_calls_q = Queue()
        self.running_flag = Value('i', 0)
        self.ongoing_activations = 0

        self.job_monitor = JobMonitor(self.config, self.internal_storage,
                                      self.token_bucket_q)
예제 #8
0
def update_runtime(name, config=None):
    config = default_config(config)
    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)
    compute_config = extract_compute_config(config)
    compute_handler = Compute(compute_config)

    timeout = config['cloudbutton']['runtime_timeout']
    logger.info('Updating runtime: {}'.format(name))

    runtimes = compute_handler.list_runtimes(name)

    for runtime in runtimes:
        runtime_key = compute_handler.get_runtime_key(runtime[0], runtime[1])
        runtime_meta = compute_handler.create_runtime(runtime[0], runtime[1],
                                                      timeout)

        try:
            internal_storage.put_runtime_meta(runtime_key, runtime_meta)
        except Exception:
            raise (
                "Unable to upload 'preinstalled-modules' file into {}".format(
                    internal_storage.backend))
예제 #9
0
def function_handler(event):
    start_tstamp = time.time()

    log_level = event['log_level']
    cloud_logging_config(log_level)
    logger.debug("Action handler started")

    extra_env = event.get('extra_env', {})
    os.environ.update(extra_env)

    os.environ.update({'CLOUDBUTTON_FUNCTION': 'True',
                       'PYTHONUNBUFFERED': 'True'})

    config = event['config']
    call_id = event['call_id']
    job_id = event['job_id']
    executor_id = event['executor_id']
    exec_id = "{}/{}/{}".format(executor_id, job_id, call_id)
    logger.info("Execution-ID: {}".format(exec_id))

    runtime_name = event['runtime_name']
    runtime_memory = event['runtime_memory']
    execution_timeout = event['execution_timeout']
    logger.debug("Runtime name: {}".format(runtime_name))
    logger.debug("Runtime memory: {}MB".format(runtime_memory))
    logger.debug("Function timeout: {}s".format(execution_timeout))

    func_key = event['func_key']
    data_key = event['data_key']
    data_byte_range = event['data_byte_range']

    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    call_status = CallStatus(config, internal_storage)
    call_status.response['host_submit_tstamp'] = event['host_submit_tstamp']
    call_status.response['start_tstamp'] = start_tstamp
    context_dict = {
        'cloudbutton_version': os.environ.get("CLOUDBUTTON_VERSION"),
        'call_id': call_id,
        'job_id': job_id,
        'executor_id': executor_id,
        'activation_id': os.environ.get('__PW_ACTIVATION_ID')
    }
    call_status.response.update(context_dict)

    show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False'))

    try:
        if version.__version__ != event['cloudbutton_version']:
            msg = ("Cloudbutton version mismatch. Host version: {} - Runtime version: {}"
                   .format(event['cloudbutton_version'], version.__version__))
            raise RuntimeError('HANDLER', msg)

        # send init status event
        call_status.send('__init__')

        # call_status.response['free_disk_bytes'] = free_disk_space("/tmp")
        custom_env = {'CLOUDBUTTON_CONFIG': json.dumps(config),
                      'CLOUDBUTTON_EXECUTION_ID': exec_id,
                      'PYTHONPATH': "{}:{}".format(os.getcwd(), LIBS_PATH)}
        os.environ.update(custom_env)

        jobrunner_stats_dir = os.path.join(STORAGE_FOLDER,
                                           storage_config['bucket'],
                                           JOBS_PREFIX, executor_id,
                                           job_id, call_id)
        os.makedirs(jobrunner_stats_dir, exist_ok=True)
        jobrunner_stats_filename = os.path.join(jobrunner_stats_dir, 'jobrunner.stats.txt')

        jobrunner_config = {'cloudbutton_config': config,
                            'call_id':  call_id,
                            'job_id':  job_id,
                            'executor_id':  executor_id,
                            'func_key': func_key,
                            'data_key': data_key,
                            'log_level': log_level,
                            'data_byte_range': data_byte_range,
                            'output_key': create_output_key(JOBS_PREFIX, executor_id, job_id, call_id),
                            'stats_filename': jobrunner_stats_filename}

        if show_memory_peak:
            mm_handler_conn, mm_conn = Pipe()
            memory_monitor = Thread(target=memory_monitor_worker, args=(mm_conn, ))
            memory_monitor.start()

        handler_conn, jobrunner_conn = Pipe()
        jobrunner = JobRunner(jobrunner_config, jobrunner_conn, internal_storage)
        logger.debug('Starting JobRunner process')
        local_execution = strtobool(os.environ.get('__PW_LOCAL_EXECUTION', 'False'))
        jrp = Thread(target=jobrunner.run) if local_execution else Process(target=jobrunner.run)
        jrp.start()

        jrp.join(execution_timeout)
        logger.debug('JobRunner process finished')

        if jrp.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            try:
                jrp.terminate()
            except Exception:
                # thread does not have terminate method
                pass
            msg = ('Function exceeded maximum time of {} seconds and was '
                   'killed'.format(execution_timeout))
            raise TimeoutError('HANDLER', msg)

        if show_memory_peak:
            mm_handler_conn.send('STOP')
            memory_monitor.join()
            peak_memory_usage = int(mm_handler_conn.recv())
            logger.info("Peak memory usage: {}".format(sizeof_fmt(peak_memory_usage)))
            call_status.response['peak_memory_usage'] = peak_memory_usage

        if not handler_conn.poll():
            logger.error('No completion message received from JobRunner process')
            logger.debug('Assuming memory overflow...')
            # Only 1 message is returned by jobrunner when it finishes.
            # If no message, this means that the jobrunner process was killed.
            # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM.
            msg = 'Function exceeded maximum memory and was killed'
            raise MemoryError('HANDLER', msg)

        if os.path.exists(jobrunner_stats_filename):
            with open(jobrunner_stats_filename, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        call_status.response[key] = float(value)
                    except Exception:
                        call_status.response[key] = value
                    if key in ['exception', 'exc_pickle_fail', 'result', 'new_futures']:
                        call_status.response[key] = eval(value)

    except Exception:
        # internal runtime exceptions
        print('----------------------- EXCEPTION !-----------------------', flush=True)
        traceback.print_exc(file=sys.stdout)
        print('----------------------------------------------------------', flush=True)
        call_status.response['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
        call_status.response['exc_info'] = str(pickled_exc)

    finally:
        call_status.response['end_tstamp'] = time.time()
        call_status.send('__end__')

        for key in extra_env:
            os.environ.pop(key)

        logger.info("Finished")
예제 #10
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 runtime_memory=None,
                 compute_backend=None,
                 compute_backend_region=None,
                 storage_backend=None,
                 storage_backend_region=None,
                 workers=None,
                 rabbitmq_monitor=None,
                 remote_invoker=None,
                 log_level=None):
        """
        Initialize a FunctionExecutor class.

        :param config: Settings passed in here will override those in config file. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_memory: memory to use in the runtime. Default None.
        :param compute_backend: Name of the compute backend to use. Default None.
        :param compute_backend_region: Name of the compute backend region to use. Default None.
        :param storage_backend: Name of the storage backend to use. Default None.
        :param storage_backend_region: Name of the storage backend region to use. Default None.
        :param workers: Max number of concurrent workers.
        :param rabbitmq_monitor: use rabbitmq as the monitoring system. Default None.
        :param log_level: log level to use during the execution. Default None.

        :return `FunctionExecutor` object.
        """
        self.is_cloudbutton_function = is_cloudbutton_function()

        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if (logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(
                    logger.getEffectiveLevel())
        if self.log_level:
            os.environ["CLOUDBUTTON_LOGLEVEL"] = self.log_level
            if not self.is_cloudbutton_function:
                default_logging_config(self.log_level)

        # Overwrite pywren config parameters
        pw_config_ow = {}
        if runtime is not None:
            pw_config_ow['runtime'] = runtime
        if runtime_memory is not None:
            pw_config_ow['runtime_memory'] = int(runtime_memory)
        if compute_backend is not None:
            pw_config_ow['compute_backend'] = compute_backend
        if compute_backend_region is not None:
            pw_config_ow['compute_backend_region'] = compute_backend_region
        if storage_backend is not None:
            pw_config_ow['storage_backend'] = storage_backend
        if storage_backend_region is not None:
            pw_config_ow['storage_backend_region'] = storage_backend_region
        if workers is not None:
            pw_config_ow['workers'] = workers
        if rabbitmq_monitor is not None:
            pw_config_ow['rabbitmq_monitor'] = rabbitmq_monitor
        if remote_invoker is not None:
            pw_config_ow['remote_invoker'] = remote_invoker

        self.config = default_config(copy.deepcopy(config), pw_config_ow)

        self.executor_id = create_executor_id()
        logger.debug('FunctionExecutor created with ID: {}'.format(
            self.executor_id))

        self.data_cleaner = self.config['cloudbutton'].get(
            'data_cleaner', True)
        self.rabbitmq_monitor = self.config['cloudbutton'].get(
            'rabbitmq_monitor', False)

        if self.rabbitmq_monitor:
            if 'rabbitmq' in self.config and 'amqp_url' in self.config[
                    'rabbitmq']:
                self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
            else:
                raise Exception(
                    "You cannot use rabbitmq_mnonitor since 'amqp_url'"
                    " is not present in configuration")

        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        self.invoker = FunctionInvoker(self.config, self.executor_id,
                                       self.internal_storage)

        self.futures = []
        self.total_jobs = 0
        self.cleaned_jobs = set()
        self.last_call = None