コード例 #1
0
def delete_runtime(image_name, config=None):
    logger.info('Deleting runtime: {}'.format(image_name))

    if config is None:
        config = wrenconfig.default()
    else:
        config = wrenconfig.default(config)

    storage_config = wrenconfig.extract_storage_config(config)
    storage_client = storage.InternalStorage(storage_config)
    cf_config = wrenconfig.extract_cf_config(config)
    cf_client = CloudFunctions(cf_config)

    if image_name == 'default':
        image_name = _get_default_image_name()

    image_name_formated = create_action_name(image_name)
    actions = cf_client.list_actions(PACKAGE)
    region = cf_client.endpoint.split('//')[1].split('.')[0]
    namespace = cf_client.namespace

    for action in actions:
        action_name, memory = action['name'].rsplit('-', 1)
        if image_name_formated == action_name:
            memory = int(memory.replace('MB', ''))
            runtime_name = create_runtime_name(image_name, memory)
            storage_client.delete_runtime_info(region, namespace, runtime_name)
            action_name = create_action_name(runtime_name)
            cf_client.delete_action(action_name)
コード例 #2
0
def clean_bucket(bucket, prefix, storage_config):
    """
    Wrapper of clean_os_bucket(). Use this method only when storage_config is
    in JSON format. In any other case, call directly clean_os_bucket() method.
    """
    internal_storage = storage.InternalStorage(json.loads(storage_config))
    sys.stdout = open(os.devnull, 'w')
    clean_os_bucket(bucket, prefix, internal_storage)
    sys.stdout = sys.__stdout__
コード例 #3
0
ファイル: wren.py プロジェクト: chen116/pywren-ibm-cloud
    def __init__(self,
                 config=None,
                 runtime=None,
                 log_level=None,
                 runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_timeout: Max time per action. Default 600
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime

        if log_level:
            wrenlogging.default_config(log_level)

        ibm_cf_config = self.config['ibm_cf']
        self.runtime = ibm_cf_config['action_name']
        self.cf_cluster = ibm_cf_config['is_cf_cluster']
        self.data_cleaner = self.config['pywren']['data_cleaner']

        retry_config = {}
        retry_config['invocation_retry'] = self.config['pywren'][
            'invocation_retry']
        retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps']
        retry_config['retries'] = self.config['pywren']['retries']

        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config,
                                                    retry_config)

        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.internal_storage,
                                 runtime_timeout)
        self.executor_id = self.executor.executor_id

        self.futures = []
        self.reduce_future = None
コード例 #4
0
def extract_modules(image_name, config=None, pywren_location=None):
    # Extract installed Python modules from docker image
    # And store them into storage

    # Create runtime_name from image_name
    username, appname = image_name.split('/')
    runtime_name = appname.replace(':', '_')

    # Load PyWren config from ~/.pywren_config
    if config is None:
        config = wrenconfig.default()
    else:
        config = wrenconfig.default(config)

    # Create storage_handler to upload modules file
    storage_config = wrenconfig.extract_storage_config(config)
    internal_storage = storage.InternalStorage(storage_config)

    # sys.stdout = open(os.devnull, 'w')
    if pywren_location is None:
        action_location = "extract_modules.py"
    else:
        action_location = os.path.join(pywren_location, "runtime",
                                       "extract_modules.py")

    with open(action_location, "r") as action_py:
        action_code = action_py.read()
    cf_client = CloudFunctions(config['ibm_cf'])
    action_name = runtime_name + '_modules'
    cf_client.create_action(action_name,
                            code=action_code,
                            kind='blackbox',
                            image=image_name,
                            is_binary=False)
    runtime_meta = cf_client.invoke_with_result(action_name)
    internal_storage.put_runtime_info(runtime_name, runtime_meta)
    cf_client.delete_action(action_name)
コード例 #5
0
def _extract_modules(image_name, memory, cf_client, config):
    # Extract installed Python modules from docker image
    # And store them into storage
    # Create storage_handler to upload modules file
    storage_config = wrenconfig.extract_storage_config(config)
    internal_storage = storage.InternalStorage(storage_config)

    pywren_location = _get_pywren_location()
    action_location = os.path.join(pywren_location, "runtime",
                                   "extract_modules.py")

    with open(action_location, "r") as action_py:
        action_code = action_py.read()

    modules_action_name = '{}-modules'.format(create_action_name(image_name))

    # old_stdout = sys.stdout
    # sys.stdout = open(os.devnull, 'w')
    logger.debug(
        "Creating action for extracting Python modules list: {}".format(
            modules_action_name))
    cf_client.create_action(modules_action_name,
                            image_name,
                            code=action_code,
                            is_binary=False)
    # sys.stdout = old_stdout

    region = cf_client.endpoint.split('//')[1].split('.')[0]
    namespace = cf_client.namespace
    memory = cf_client.default_runtime_memory if not memory else memory
    runtime_name = create_runtime_name(image_name, memory)
    logger.debug(
        "Going to extract Python modules list from: {}".format(image_name))
    runtime_meta = cf_client.invoke_with_result(modules_action_name)
    internal_storage.put_runtime_info(region, namespace, runtime_name,
                                      runtime_meta)
    cf_client.delete_action(modules_action_name)
コード例 #6
0
    def run(self):
        """
        Runs the function
        """
        logger.info("Started")
        # initial output file in case job fails
        output_dict = {'result': None,
                       'success': False}
        pickled_output = pickle.dumps(output_dict)

        try:
            self.internal_storage = storage.InternalStorage(self.storage_config)

            loaded_func_all = self._get_function_and_modules()
            self._save_modules(loaded_func_all['module_data'])
            function = self._unpickle_function(loaded_func_all['func'])
            data = self._load_data()
            data = self._create_storage_clients(function, data)

            if self.show_memory:
                logger.debug("Memory usage before call the function: {}".format(get_current_memory_usage()))

            logger.info("Function: Going to execute '{}()'".format(str(function.__name__)))
            print('------------------- FUNCTION LOG -------------------', flush=True)
            func_exec_time_t1 = time.time()
            result = function(**data)
            func_exec_time_t2 = time.time()
            print('----------------------------------------------------', flush=True)
            logger.info("Function: Success execution")

            if self.show_memory:
                logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage()))

            self.stats.write('function_exec_time', round(func_exec_time_t2-func_exec_time_t1, 8))
            output_dict = {'result': result,
                           'success': True}
            pickled_output = pickle.dumps(output_dict)

            # Check for new futures
            if isinstance(result, ResponseFuture):
                callgroup_id = result.callgroup_id
                self.stats.write('new_futures', '{}/{}'.format(callgroup_id, 1))
            elif type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture):
                callgroup_id = result[0].callgroup_id
                self.stats.write('new_futures', '{}/{}'.format(callgroup_id, len(result)))
            else:
                self.stats.write('new_futures', '{}/{}'.format(None, 0))

            if self.show_memory:
                logger.debug("Memory usage after output serialization: {}".format(get_current_memory_usage()))

        except Exception as e:
            print('------------------ EXCEPTION -------------------------')
            exc_type, exc_value, exc_traceback = sys.exc_info()
            #traceback.print_tb(exc_traceback)

            # Shockingly often, modules like subprocess don't properly
            # call the base Exception.__init__, which results in them
            # being unpickleable. As a result, we actually wrap this in a try/catch block
            # and more-carefully handle the exceptions if any part of this save / test-reload
            # fails
            logger.error("There was an exception: {}".format(str(e)))

            try:
                pickled_output = pickle.dumps({'result': e,
                                               'exc_type': exc_type,
                                               'exc_value': exc_value,
                                               'exc_traceback': exc_traceback,
                                               'sys.path': sys.path,
                                               'success': False})

                # this is just to make sure they can be unpickled
                pickle.loads(pickled_output)

            except Exception as pickle_exception:
                pickled_output = pickle.dumps({'result': str(e),
                                               'exc_type': str(exc_type),
                                               'exc_value': str(exc_value),
                                               'exc_traceback': exc_traceback,
                                               'exc_traceback_str': str(exc_traceback),
                                               'sys.path': sys.path,
                                               'pickle_fail': True,
                                               'pickle_exception': pickle_exception,
                                               'success': False})
        finally:
            store_result = True
            if 'STORE_RESULT' in os.environ:
                store_result = eval(os.environ['STORE_RESULT'])

            if store_result:
                output_upload_timestamp_t1 = time.time()
                logger.info("Storing {} - Size: {}".format(self.output_key, sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_timestamp_t2 = time.time()
                self.stats.write("output_upload_time", round(output_upload_timestamp_t2 - output_upload_timestamp_t1, 8))
            self.result_queue.put("Finished")
            logger.info("Finished")
コード例 #7
0
    def result(self,
               check_only=False,
               throw_except=True,
               internal_storage=None):
        """
        Return the value returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param throw_except: Reraise exception if call raised. Default true.
        :param storage_handler: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self._state == JobState.new:
            raise ValueError("job not yet invoked")

        if internal_storage is None:
            internal_storage = storage.InternalStorage(self.storage_config)

        self.status(check_only, throw_except, internal_storage)

        if self._state == JobState.success:
            return self._return_val

        if self._state == JobState.futures:
            return self._new_futures

        if self._state == JobState.error:
            if throw_except:
                raise FunctionException(self.executor_id, self.activation_id,
                                        self._exception)
            else:
                return None

        if not self._produce_output:
            return

        call_output_time = time.time()
        call_invoker_result = internal_storage.get_call_output(
            self.executor_id, self.callgroup_id, self.call_id)
        self.output_query_count += 1

        while call_invoker_result is None and self.output_query_count < self.GET_RESULT_MAX_RETRIES:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_invoker_result = internal_storage.get_call_output(
                self.executor_id, self.callgroup_id, self.call_id)
            self.output_query_count += 1

        if call_invoker_result is None:
            if throw_except:
                raise Exception(
                    'Unable to get the output of the function - Activation ID: {}'
                    .format(self.activation_id))
            else:
                self._set_state(JobState.error)
                return None

        call_invoker_result = pickle.loads(call_invoker_result)
        call_output_time_done = time.time()
        self._call_invoker_result = call_invoker_result

        self.invoke_status[
            'download_output_time'] = call_output_time_done - call_output_time
        self.invoke_status['output_query_count'] = self.output_query_count
        self.invoke_status['download_output_timestamp'] = call_output_time_done

        log_msg = ('Executor ID {} Got output from Function {} - Activation '
                   'ID: {}'.format(self.executor_id, self.call_id,
                                   self.activation_id))
        logger.debug(log_msg)

        function_result = call_invoker_result['result']

        if isinstance(function_result, ResponseFuture):
            self._new_futures = [function_result]
            self._set_state(JobState.futures)
            self.invoke_status['status_done_timestamp'] = self.invoke_status[
                'download_output_timestamp']
            del self.invoke_status['download_output_timestamp']
            return self._new_futures

        elif type(function_result
                  ) == list and len(function_result) > 0 and isinstance(
                      function_result[0], ResponseFuture):
            self._new_futures = function_result
            self._set_state(JobState.futures)
            self.invoke_status['status_done_timestamp'] = self.invoke_status[
                'download_output_timestamp']
            del self.invoke_status['download_output_timestamp']
            return self._new_futures

        else:
            self._return_val = function_result
            self._set_state(JobState.success)
            return self._return_val
コード例 #8
0
    def status(self,
               check_only=False,
               throw_except=True,
               internal_storage=None):
        """
        Return the status returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param check_only: Return None immediately if job is not complete. Default False.
        :param throw_except: Reraise exception if call raised. Default true.
        :param storage_handler: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self._state == JobState.new:
            raise ValueError("job not yet invoked")

        if self._state == JobState.ready or self._state == JobState.success:
            return self.run_status

        if internal_storage is None:
            internal_storage = storage.InternalStorage(self.storage_config)

        storage_utils.check_storage_path(internal_storage.get_storage_config(),
                                         self.storage_path)
        call_status = internal_storage.get_call_status(self.executor_id,
                                                       self.callgroup_id,
                                                       self.call_id)
        self.status_query_count += 1

        if check_only is True:
            if call_status is None:
                return None

        while call_status is None:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_status = internal_storage.get_call_status(
                self.executor_id, self.callgroup_id, self.call_id)
            self.status_query_count += 1

        self.invoke_status['status_done_timestamp'] = time.time()
        self.invoke_status['status_query_count'] = self.status_query_count

        self.run_status = call_status  # this is the remote status information

        total_time = format(
            round(call_status['end_time'] - call_status['start_time'], 2),
            '.2f')

        if call_status['exception']:
            # the action handler/jobrunner/function had an exception
            self._set_state(JobState.error)
            self._exception = pickle.loads(eval(call_status['exc_info']))
            msg = None

            if not call_status.get('exc_pickle_fail', False):
                exception_args = self._exception[1].args

                if exception_args[0] == "WRONGVERSION":
                    msg = "PyWren version mismatch: remote expected version {}, local" \
                          "library is version {}".format(exception_args[2], exception_args[3])

                if exception_args[0] == "OUTATIME":
                    msg = "Process ran out of time"

                if exception_args[0] == "OUTOFMEMORY":
                    msg = "Process exceeded maximum memory and was killed"
            else:
                fault = Exception(self._exception['exc_value'])
                self._exception = (Exception, fault,
                                   self._exception['exc_traceback'])

            if throw_except:
                raise FunctionException(self.executor_id, self.activation_id,
                                        self._exception, msg)
            return None

        log_msg = ('Executor ID {} Response from Function {} - Activation '
                   'ID: {} - Time: {} seconds'.format(self.executor_id,
                                                      self.call_id,
                                                      self.activation_id,
                                                      str(total_time)))
        logger.debug(log_msg)
        self._set_state(JobState.ready)
        if not call_status['result']:
            # Function does not produced output
            self._set_state(JobState.success)

        if 'new_futures' in call_status:
            unused_callgroup_id, total_new_futures = call_status[
                'new_futures'].split('/')
            if int(total_new_futures) > 0:
                self.result(throw_except=throw_except,
                            internal_storage=internal_storage)

        return self.run_status
コード例 #9
0
    def result(self, check_only=False, throw_except=True, internal_storage=None):
        """
        Return the value returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param timeout: This method will wait up to timeout seconds before raising
            a TimeoutError if function hasn't completed. If None, wait indefinitely. Default None.
        :param check_only: Return None immediately if job is not complete. Default False.
        :param throw_except: Reraise exception if call raised. Default true.
        :param verbose: Shows some information prints.
        :param storage_handler: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self._state == JobState.new:
            raise ValueError("job not yet invoked")

        if self._state == JobState.success:
            return self._return_val
        
        if self._state == JobState.futures:
            return self._new_futures

        if self._state == JobState.error:
            if throw_except:
                raise self._exception
            else:
                return None

        if internal_storage is None:
            internal_storage = storage.InternalStorage(self.storage_config)

        storage_utils.check_storage_path(internal_storage.get_storage_config(), self.storage_path)
        call_status = internal_storage.get_call_status(self.executor_id, self.callgroup_id, self.call_id)
        self.status_query_count += 1

        if check_only is True:
            if call_status is None:
                return None

        while call_status is None:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_status = internal_storage.get_call_status(self.executor_id, self.callgroup_id, self.call_id)
            self.status_query_count += 1

        self._invoke_metadata['status_done_timestamp'] = time.time()
        self._invoke_metadata['status_query_count'] = self.status_query_count

        self.run_status = call_status  # this is the remote status information
        self.invoke_status = self._invoke_metadata  # local status information

        total_time = format(round(call_status['end_time'] - call_status['start_time'], 2), '.2f')

        if call_status['exception'] is not None:
            # the wrenhandler had an exception
            self._set_state(JobState.error)
            exception_str = call_status['exception']
            exception_args = call_status['exception_args']

            log_msg = ('Executor ID {} Error in {} {} - Time: {} '
                       'seconds- Result: {}'.format(self.executor_id,
                                                    self.call_id,
                                                    self.activation_id,
                                                    str(total_time),
                                                    exception_args[0]+" "+exception_args[1]))
            logger.debug(log_msg)

            if exception_args[0] == "WRONGVERSION":
                if throw_except:
                    raise Exception("Pywren version mismatch: remote "
                                    "expected version {}, local library is version {}".format(
                                     exception_args[2], exception_args[3]))
                return None
            elif exception_args[0] == "OUTATIME":
                if throw_except:
                    raise Exception("Process ran out of time - {} - {}".format(self.call_id,
                                                                               self.activation_id))
                return None
            elif exception_args[0] == "OUTOFMEMORY":
                if throw_except:
                    raise Exception("Process exceeded maximum memory and was "
                                    "killed - {} - {}".format(self.call_id, self.activation_id))
                return None
            else:
                if 'exception_traceback' in call_status:
                    self._exception = Exception(exception_str, *exception_args)
                if throw_except:
                    raise self._exception
                return None

        call_output_time = time.time()
        call_invoker_result = internal_storage.get_call_output(self.executor_id, self.callgroup_id, self.call_id)
        self.output_query_count += 1

        while call_invoker_result is None and self.output_query_count < self.GET_RESULT_MAX_RETRIES:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_invoker_result = internal_storage.get_call_output(self.executor_id, self.callgroup_id, self.call_id)
            self.output_query_count += 1

        if call_invoker_result is None:
            if throw_except:
                raise Exception('Unable to get the output of the function - Activation ID: {}'.format(self.activation_id))
            else:
                self._set_state(JobState.error)
                return None

        call_invoker_result = pickle.loads(call_invoker_result)
        call_output_time_done = time.time()
        self._call_invoker_result = call_invoker_result

        self._invoke_metadata['download_output_time'] = call_output_time_done - call_output_time
        self._invoke_metadata['output_query_count'] = self.output_query_count
        self._invoke_metadata['download_output_timestamp'] = call_output_time_done
        call_success = call_invoker_result['success']
        self.invoke_status = self._invoke_metadata  # local status information

        if call_success:       
            log_msg = ('Executor ID {} Response from Function {} - Activation '
           'ID: {} - Time: {} seconds'.format(self.executor_id,
                                              self.call_id,
                                              self.activation_id,
                                              str(total_time)))
            logger.debug(log_msg)
            
            function_result = call_invoker_result['result']

            if isinstance(function_result, ResponseFuture):
                self._new_futures = [function_result]
                self._set_state(JobState.futures)
                return self._new_futures
            
            elif type(function_result) == list and len(function_result) > 0 \
                 and isinstance(function_result[0], ResponseFuture):
                self._new_futures = function_result                
                self._set_state(JobState.futures)
                return self._new_futures
            
            else:
                self._return_val = function_result
                self._set_state(JobState.success)
                return self._return_val

        elif throw_except:            
            self._exception = call_invoker_result['result']
            self._traceback = (call_invoker_result['exc_type'],
                               call_invoker_result['exc_value'],
                               call_invoker_result['exc_traceback'])

            self._set_state(JobState.error)
            if call_invoker_result.get('pickle_fail', False):
                fault = Exception(call_invoker_result['exc_value'])
                reraise(Exception, fault, call_invoker_result['exc_traceback'])
            else:
                reraise(*self._traceback)
        else:
            self._set_state(JobState.error)
            return None  # nothing, don't raise, no value
コード例 #10
0
def ibm_cloud_function_handler(event):
    start_time = time.time()
    logger.info("Starting handler")
    response_status = {'exception': None}
    response_status['start_time'] = start_time

    context_dict = {
        'ibm_cf_request_id': os.environ.get("__OW_ACTIVATION_ID"),
        'ibm_cf_hostname': os.environ.get("HOSTNAME"),
        'ibm_cf_python_version': os.environ.get("PYTHON_VERSION"),
    }

    config = event['config']
    storage_config = wrenconfig.extract_storage_config(config)
    custom_handler_env = {'PYWREN_CONFIG': json.dumps(config),
                          'STORAGE_CONFIG': json.dumps(storage_config),
                          'PYWREN_EXECUTOR_ID':  event['executor_id']}
    os.environ.update(custom_handler_env)

    try:
        status_key = event['status_key']
        func_key = event['func_key']
        data_key = event['data_key']
        data_byte_range = event['data_byte_range']
        output_key = event['output_key']

        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "Pywren version mismatch",
                            version.__version__, event['pywren_version'])

        job_max_runtime = event.get("job_max_runtime", 550)  # default for CF

        response_status['func_key'] = func_key
        response_status['data_key'] = data_key
        response_status['output_key'] = output_key
        response_status['status_key'] = status_key

        #free_disk_bytes = free_disk_space("/tmp")
        #response_status['free_disk_bytes'] = free_disk_bytes

        extra_env = event.get('extra_env', {})
        extra_env['PYTHONPATH'] = "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH)
        extra_env['PYTHONUNBUFFERED'] = 'True'

        call_id = event['call_id']
        callgroup_id = event['callgroup_id']
        executor_id = event['executor_id']
        response_status['call_id'] = call_id
        response_status['callgroup_id'] = callgroup_id
        response_status['executor_id'] = executor_id

        # pass a full json blob
        jobrunner_config = {'func_key': func_key,
                            'data_key': data_key,
                            'data_byte_range': data_byte_range,
                            'python_module_path': PYTHON_MODULE_PATH,
                            'output_key': output_key,
                            'stats_filename': JOBRUNNER_STATS_FILENAME}

        with open(JOBRUNNER_CONFIG_FILENAME, 'w') as jobrunner_fid:
            json.dump(jobrunner_config, jobrunner_fid)

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        cmdstr = "python {} {}".format(JOBRUNNER_PATH, JOBRUNNER_CONFIG_FILENAME)

        logger.info("About to execute '{}'".format(cmdstr))
        setup_time = time.time()
        response_status['setup_time'] = setup_time - start_time

        local_env = os.environ.copy()
        local_env.update(extra_env)

        """
        stdout = os.popen(cmdstr).read()
        print(stdout)
        process = subprocess.run(cmdstr, shell=True, env=local_env, bufsize=1,
                                 stdout=subprocess.PIPE, preexec_fn=os.setsid,
                                 universal_newlines=True, timeout=job_max_runtime)

        print(process.stdout)
        """
        # This is copied from http://stackoverflow.com/a/17698359/4577954
        # reasons for setting process group: http://stackoverflow.com/a/4791612
        process = subprocess.Popen(cmdstr, shell=True, env=local_env, bufsize=1,
                                   stdout=subprocess.PIPE, preexec_fn=os.setsid,
                                   universal_newlines=True)

        logger.info("launched process")

        def consume_stdout(stdout, queue):
            with stdout:
                for line in stdout:
                    print(line, end='')
                    queue.put(line)

        q = Queue()

        t = Thread(target=consume_stdout, args=(process.stdout, q))
        t.daemon = True
        t.start()
        t.join(job_max_runtime)

        if t.isAlive():
            # If process is still alive after t.join(job_max_runtime), kill it
            logger.error("Process exceeded maximum runtime of {} sec".format(job_max_runtime))
            # Send the signal to all the process groups
            os.killpg(os.getpgid(process.pid), signal.SIGTERM)
            raise Exception("OUTATIME",  "Process executed for too long and was killed")

        if not q.empty():
            if 'Jobrunner finished' not in q.queue[q.qsize()-1].strip():
                raise Exception("OUTOFMEMORY",  "Process exceeded maximum memory and was killed")

        logger.info("Command execution finished")
        #print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        #print(subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ")
                    float_value = float(value)
                    response_status[key] = float_value

        response_status['exec_time'] = time.time() - setup_time
        response_status['host_submit_time'] = event['host_submit_time']
        #response_status['server_info'] = get_server_info()
        response_status.update(context_dict)
        response_status['end_time'] = time.time()

    except Exception as e:
        # internal runtime exceptions
        logger.error("There was an exception: {}".format(str(e)))
        response_status['end_time'] = time.time()
        response_status['exception'] = str(e)
        response_status['exception_args'] = e.args
        response_status['exception_traceback'] = traceback.format_exc()

    finally:
        store_status = True
        if 'STORE_STATUS' in extra_env:
            store_status = eval(extra_env['STORE_STATUS'])

        if store_status:
            internal_storage = storage.InternalStorage(storage_config)
            internal_storage.put_data(status_key, json.dumps(response_status))
コード例 #11
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 runtime_memory=None,
                 log_level=None,
                 rabbitmq_monitor=False):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_memory: memory to use in the runtime
        :param log_level: log level to use during the execution
        :param rabbitmq_monitor: use rabbitmq as monitoring system
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self.start_time = time.time()
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        self.is_cf_cluster = is_cf_cluster()
        self.data_cleaner = self.config['pywren']['data_cleaner']

        # Overwrite runtime variables
        if runtime:
            self.config['pywren']['runtime'] = runtime
        if runtime_memory:
            self.config['pywren']['runtime_memory'] = int(runtime_memory)

        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if (logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(
                    logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOG_LEVEL"] = self.log_level
            if not self.is_cf_cluster:
                wrenlogging.default_config(self.log_level)

        # RabbitMQ monitor configuration
        self.rabbitmq_monitor = rabbitmq_monitor
        if self.rabbitmq_monitor:
            if self.config['rabbitmq']['amqp_url']:
                os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True'
            else:
                self.rabbitmq_monitor = False
        else:
            self.config['rabbitmq']['amqp_url'] = None

        storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(storage_config)

        invoker = invokers.IBMCloudFunctionsInvoker(self.config)
        self.executor = Executor(invoker, self.config, self.internal_storage)
        self.executor_id = self.executor.executor_id

        self.futures = []
コード例 #12
0
def function_handler(event):
    start_time = time.time()
    logger.debug("Action handler started")
    response_status = {'exception': False}
    response_status['host_submit_time'] = event['host_submit_time']
    response_status['start_time'] = start_time

    context_dict = {
        'ibm_cf_request_id': os.environ.get("__OW_ACTIVATION_ID"),
        'ibm_cf_python_version': os.environ.get("PYTHON_VERSION"),
    }

    config = event['config']
    storage_config = wrenconfig.extract_storage_config(config)

    log_level = event['log_level']
    wrenlogging.ow_config(log_level)

    call_id = event['call_id']
    callgroup_id = event['callgroup_id']
    executor_id = event['executor_id']
    logger.info("Execution ID: {}/{}/{}".format(executor_id, callgroup_id, call_id))
    job_max_runtime = event.get("job_max_runtime", 590)  # default for CF
    status_key = event['status_key']
    func_key = event['func_key']
    data_key = event['data_key']
    data_byte_range = event['data_byte_range']
    output_key = event['output_key']
    extra_env = event.get('extra_env', {})

    response_status['call_id'] = call_id
    response_status['callgroup_id'] = callgroup_id
    response_status['executor_id'] = executor_id
    # response_status['func_key'] = func_key
    # response_status['data_key'] = data_key
    # response_status['output_key'] = output_key
    # response_status['status_key'] = status_key

    try:
        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "PyWren version mismatch",
                            version.__version__, event['pywren_version'])

        # response_status['free_disk_bytes'] = free_disk_space("/tmp")

        custom_env = {'PYWREN_CONFIG': json.dumps(config),
                      'PYWREN_EXECUTOR_ID':  executor_id,
                      'PYTHONPATH': "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH),
                      'PYTHONUNBUFFERED': 'True'}

        os.environ.update(custom_env)
        os.environ.update(extra_env)

        # pass a full json blob
        jobrunner_config = {'func_key': func_key,
                            'data_key': data_key,
                            'log_level': log_level,
                            'data_byte_range': data_byte_range,
                            'python_module_path': PYTHON_MODULE_PATH,
                            'output_key': output_key,
                            'stats_filename': JOBRUNNER_STATS_FILENAME}

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        setup_time = time.time()
        response_status['setup_time'] = round(setup_time - start_time, 8)

        result_queue = multiprocessing.Queue()
        jr = jobrunner(jobrunner_config, result_queue)
        jr.daemon = True
        logger.info("Starting jobrunner process")
        jr.start()
        jr.join(job_max_runtime)
        response_status['exec_time'] = round(time.time() - setup_time, 8)

        if jr.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            logger.error("Process exceeded maximum runtime of {} seconds".format(job_max_runtime))
            # Send the signal to all the process groups
            jr.terminate()
            raise Exception("OUTATIME",  "Process executed for too long and was killed")

        try:
            # Only 1 message is returned by jobrunner
            result_queue.get(block=False)
        except Exception:
            # If no message, this means that the process was killed due memory usage
            raise Exception("OUTOFMEMORY",  "Process exceeded maximum memory and was killed")

        # print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        # print(subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        response_status[key] = float(value)
                    except Exception:
                        response_status[key] = value
                    if key == 'exception' or key == 'exc_pickle_fail' \
                       or key == 'result':
                        response_status[key] = eval(value)

        # response_status['server_info'] = get_server_info()
        response_status.update(context_dict)
        response_status['end_time'] = time.time()

    except Exception as e:
        # internal runtime exceptions
        logger.error("There was an exception: {}".format(str(e)))
        response_status['end_time'] = time.time()
        response_status['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
        response_status['exc_info'] = str(pickled_exc)

    finally:
        store_status = strtobool(os.environ.get('STORE_STATUS', 'True'))
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        dmpd_response_status = json.dumps(response_status)
        drs = sizeof_fmt(len(dmpd_response_status))

        if rabbit_amqp_url and store_status:
            status_sent = False
            output_query_count = 0
            while not status_sent and output_query_count < 5:
                output_query_count = output_query_count + 1
                try:
                    params = pika.URLParameters(rabbit_amqp_url)
                    connection = pika.BlockingConnection(params)
                    channel = connection.channel()
                    channel.queue_declare(queue=executor_id, auto_delete=True)
                    channel.basic_publish(exchange='', routing_key=executor_id,
                                          body=dmpd_response_status)
                    connection.close()
                    logger.info("Execution stats sent to rabbitmq - Size: {}".format(drs))
                    status_sent = True
                except Exception as e:
                    logger.error("Unable to send status to rabbitmq")
                    logger.error(str(e))
                    logger.info('Retrying to send stats to rabbitmq...')
                    time.sleep(0.2)
        if store_status:
            internal_storage = storage.InternalStorage(storage_config)
            logger.info("Storing execution stats - status.json - Size: {}".format(drs))
            internal_storage.put_data(status_key, dmpd_response_status)
コード例 #13
0
    def status(self,
               check_only=False,
               throw_except=True,
               internal_storage=None):
        """
        Return the status returned by the call.
        If the call raised an exception, this method will raise the same exception
        If the future is cancelled before completing then CancelledError will be raised.

        :param check_only: Return None immediately if job is not complete. Default False.
        :param throw_except: Reraise exception if call raised. Default true.
        :param storage_handler: Storage handler to poll cloud storage. Default None.
        :return: Result of the call.
        :raises CancelledError: If the job is cancelled before completed.
        :raises TimeoutError: If job is not complete after `timeout` seconds.
        """
        if self.ready or self.done:
            return self.run_status

        if internal_storage is None:
            internal_storage = storage.InternalStorage(self.storage_config)

        storage_utils.check_storage_path(internal_storage.get_storage_config(),
                                         self.storage_path)
        call_status = internal_storage.get_call_status(self.executor_id,
                                                       self.callgroup_id,
                                                       self.call_id)
        self.status_query_count += 1

        if check_only is True:
            if call_status is None:
                return None

        while call_status is None:
            time.sleep(self.GET_RESULT_SLEEP_SECS)
            call_status = internal_storage.get_call_status(
                self.executor_id, self.callgroup_id, self.call_id)
            self.status_query_count += 1

        self.invoke_status['status_done_timestamp'] = time.time()
        self.invoke_status['status_query_count'] = self.status_query_count

        self.run_status = call_status  # this is the remote status information

        total_time = format(
            round(call_status['end_time'] - call_status['start_time'], 2),
            '.2f')

        if call_status['exception'] is not None:
            # the wrenhandler had an exception
            self._set_state(JobState.error)
            exception_str = call_status['exception']
            exception_args = call_status['exception_args']

            log_msg = ('Executor ID {} Error in {} {} - Time: {} '
                       'seconds- Result: {}'.format(
                           self.executor_id, self.call_id, self.activation_id,
                           str(total_time),
                           exception_args[0] + " " + exception_args[1]))
            logger.debug(log_msg)

            if exception_args[0] == "WRONGVERSION":
                if throw_except:
                    raise Exception(
                        "Pywren version mismatch: remote "
                        "expected version {}, local library is version {}".
                        format(exception_args[2], exception_args[3]))
                return None
            elif exception_args[0] == "OUTATIME":
                if throw_except:
                    raise Exception("Process ran out of time - {} - {}".format(
                        self.call_id, self.activation_id))
                return None
            elif exception_args[0] == "OUTOFMEMORY":
                if throw_except:
                    raise Exception("Process exceeded maximum memory and was "
                                    "killed - {} - {}".format(
                                        self.call_id, self.activation_id))
                return None
            else:
                if 'exception_traceback' in call_status:
                    self._exception = Exception(exception_str, *exception_args)
                if throw_except:
                    raise self._exception
                return None

        log_msg = ('Executor ID {} Response from Function {} - Activation '
                   'ID: {} - Time: {} seconds'.format(self.executor_id,
                                                      self.call_id,
                                                      self.activation_id,
                                                      str(total_time)))
        logger.debug(log_msg)
        self._set_state(JobState.ready)

        if 'new_futures' in call_status:
            unused_callgroup_id, total_new_futures = call_status[
                'new_futures'].split('/')
            if int(total_new_futures) > 0:
                self.result(throw_except=throw_except,
                            internal_storage=internal_storage)

        return self.run_status