def invoke(executor_id, job_id, call_id, func_key, invoke_metadata, data_key, data_byte_range): output_key = create_output_key(self.storage_config['prefix'], executor_id, job_id, call_id) status_key = create_status_key(self.storage_config['prefix'], executor_id, job_id, call_id) payload = { 'config': self.config, 'log_level': self.log_level, 'func_key': func_key, 'data_key': data_key, 'output_key': output_key, 'status_key': status_key, 'task_execution_timeout': job.task_execution_timeout, 'data_byte_range': data_byte_range, 'executor_id': executor_id, 'job_id': job_id, 'call_id': call_id, 'pywren_version': __version__ } if job.extra_env is not None: logger.debug("Extra environment vars {}".format(job.extra_env)) payload['extra_env'] = job.extra_env if job.extra_meta is not None: # sanity for k, v in job.extra_meta.items(): if k in payload: raise ValueError("Key {} already in dict".format(k)) payload[k] = v # overwrite explicit args, mostly used for testing via injection if job.overwrite_invoke_args is not None: payload.update(job.overwrite_invoke_args) host_submit_time = time.time() payload['host_submit_time'] = host_submit_time # do the invocation activation_id = self.internal_compute.invoke( job.runtime_name, job.runtime_memory, payload) if not activation_id: raise Exception( "ExecutorID {} - Activation {} failed, therefore job is failed" .format(executor_id, call_id)) invoke_metadata['activation_id'] = activation_id invoke_metadata['invoke_time'] = time.time() - host_submit_time invoke_metadata.update(payload) del invoke_metadata['config'] fut = ResponseFuture(call_id, job_id, executor_id, activation_id, self.storage_config, invoke_metadata) fut._set_state(JobState.invoked) return fut
def get_call_output(self, executor_id, job_id, call_id): """ Get the output of a call. :param executor_id: executor ID of the call :param call_id: call ID of the call :return: Output of the call. """ output_key = create_output_key(JOBS_PREFIX, executor_id, job_id, call_id) try: return self.storage_handler.get_object(self.bucket, output_key) except StorageNoSuchKeyError: return None
def invoke(executor_id, job_id, call_id, func_key, invoke_metadata, data_key, data_byte_range): output_key = create_output_key(self.storage_config['prefix'], executor_id, job_id, call_id) status_key = create_status_key(self.storage_config['prefix'], executor_id, job_id, call_id) payload = { 'config': self.config, 'log_level': self.log_level, 'func_key': func_key, 'data_key': data_key, 'output_key': output_key, 'status_key': status_key, 'execution_timeout': job.execution_timeout, 'data_byte_range': data_byte_range, 'executor_id': executor_id, 'job_id': job_id, 'call_id': call_id, 'pywren_version': __version__ } if job.extra_env is not None: logger.debug("Extra environment vars {}".format(job.extra_env)) payload['extra_env'] = job.extra_env host_submit_time = time.time() payload['host_submit_time'] = host_submit_time # do the invocation activation_id = self.compute.invoke(job.runtime_name, job.runtime_memory, payload) if not activation_id: raise Exception( "ExecutorID {} | JobID {} - Retrying mechanism finished with no success. " "Failed to invoke the job".format(executor_id, job_id)) invoke_metadata['activation_id'] = activation_id invoke_metadata['invoke_time'] = time.time() - host_submit_time invoke_metadata.update(payload) del invoke_metadata['config'] fut = ResponseFuture(call_id, job_id, executor_id, activation_id, self.storage_config, invoke_metadata) fut._set_state(CallState.invoked) return fut
def _invoke(self, job, call_id): """ Method used to perform the actual invocation against the Compute Backend """ output_key = create_output_key(JOBS_PREFIX, job.executor_id, job.job_id, call_id) status_key = create_status_key(JOBS_PREFIX, job.executor_id, job.job_id, call_id) payload = { 'config': self.config, 'log_level': self.log_level, 'func_key': job.func_key, 'data_key': job.data_key, 'output_key': output_key, 'status_key': status_key, 'extra_env': job.extra_env, 'execution_timeout': job.execution_timeout, 'data_byte_range': job.data_ranges[int(call_id)], 'executor_id': job.executor_id, 'job_id': job.job_id, 'call_id': call_id, 'host_submit_time': time.time(), 'pywren_version': __version__ } # do the invocation start = time.time() compute_handler = random.choice(self.compute_handlers) activation_id = compute_handler.invoke(job.runtime_name, job.runtime_memory, payload) roundtrip = time.time() - start resp_time = format(round(roundtrip, 3), '.3f') if not activation_id: self.pending_calls_q.put((job, call_id)) return logger.debug( 'ExecutorID {} | JobID {} - Function invocation {} done! ({}s) - Activation' ' ID: {}'.format(job.executor_id, job.job_id, call_id, resp_time, activation_id)) return call_id
def invoke(executor_id, job_id, call_id, func_key, job_metadata, data_key, data_byte_range): output_key = create_output_key(self.storage_config['prefix'], executor_id, job_id, call_id) status_key = create_status_key(self.storage_config['prefix'], executor_id, job_id, call_id) payload = { 'config': self.pywren_config, 'log_level': self.log_level, 'func_key': func_key, 'data_key': data_key, 'output_key': output_key, 'status_key': status_key, 'extra_env': job.extra_env, 'execution_timeout': job.execution_timeout, 'data_byte_range': data_byte_range, 'executor_id': executor_id, 'job_id': job_id, 'call_id': call_id, 'host_submit_time': time.time(), 'pywren_version': __version__ } # do the invocation compute_handler = random.choice(self.compute_handlers) activation_id = compute_handler.invoke(job.runtime_name, job.runtime_memory, payload) if not activation_id: raise Exception( "ExecutorID {} | JobID {} - Retrying mechanism finished with no success. " "Failed to invoke the job".format(executor_id, job_id)) job_metadata['activation_id'] = activation_id fut = ResponseFuture(executor_id, job_id, call_id, self.storage_config, job_metadata) fut._set_state(ResponseFuture.State.Invoked) return fut
def function_handler(event): start_tstamp = time.time() log_level = event['log_level'] cloud_logging_config(log_level) logger.debug("Action handler started") extra_env = event.get('extra_env', {}) os.environ.update(extra_env) os.environ.update({'PYWREN_FUNCTION': 'True', 'PYTHONUNBUFFERED': 'True'}) config = event['config'] call_id = event['call_id'] job_id = event['job_id'] executor_id = event['executor_id'] exec_id = "{}/{}/{}".format(executor_id, job_id, call_id) logger.info("Execution-ID: {}".format(exec_id)) runtime_name = event['runtime_name'] runtime_memory = event['runtime_memory'] execution_timeout = event['execution_timeout'] logger.debug("Runtime name: {}".format(runtime_name)) logger.debug("Runtime memory: {}MB".format(runtime_memory)) logger.debug("Function timeout: {}s".format(execution_timeout)) func_key = event['func_key'] data_key = event['data_key'] data_byte_range = event['data_byte_range'] storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) call_status = CallStatus(config, internal_storage) call_status.response['host_submit_tstamp'] = event['host_submit_tstamp'] call_status.response['start_tstamp'] = start_tstamp context_dict = { 'python_version': os.environ.get("PYTHON_VERSION"), 'call_id': call_id, 'job_id': job_id, 'executor_id': executor_id, 'activation_id': os.environ.get('__PW_ACTIVATION_ID') } call_status.response.update(context_dict) show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False')) try: if version.__version__ != event['pywren_version']: msg = ( "PyWren version mismatch. Host version: {} - Runtime version: {}" .format(event['pywren_version'], version.__version__)) raise RuntimeError('HANDLER', msg) # send init status event call_status.send('__init__') # call_status.response['free_disk_bytes'] = free_disk_space("/tmp") custom_env = { 'PYWREN_CONFIG': json.dumps(config), 'PYWREN_EXECUTION_ID': exec_id, 'PYTHONPATH': "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH) } os.environ.update(custom_env) jobrunner_stats_dir = os.path.join(STORAGE_FOLDER, storage_config['bucket'], JOBS_PREFIX, executor_id, job_id, call_id) os.makedirs(jobrunner_stats_dir, exist_ok=True) jobrunner_stats_filename = os.path.join(jobrunner_stats_dir, 'jobrunner.stats.txt') jobrunner_config = { 'pywren_config': config, 'call_id': call_id, 'job_id': job_id, 'executor_id': executor_id, 'func_key': func_key, 'data_key': data_key, 'log_level': log_level, 'data_byte_range': data_byte_range, 'output_key': create_output_key(JOBS_PREFIX, executor_id, job_id, call_id), 'stats_filename': jobrunner_stats_filename } if show_memory_peak: mm_handler_conn, mm_conn = Pipe() memory_monitor = Thread(target=memory_monitor_worker, args=(mm_conn, )) memory_monitor.start() handler_conn, jobrunner_conn = Pipe() jobrunner = JobRunner(jobrunner_config, jobrunner_conn, internal_storage) logger.debug('Starting JobRunner process') local_execution = strtobool( os.environ.get('__PW_LOCAL_EXECUTION', 'False')) jrp = Thread(target=jobrunner.run) if local_execution else Process( target=jobrunner.run) jrp.start() jrp.join(execution_timeout) logger.debug('JobRunner process finished') if jrp.is_alive(): # If process is still alive after jr.join(job_max_runtime), kill it try: jrp.terminate() except Exception: # thread does not have terminate method pass msg = ('Function exceeded maximum time of {} seconds and was ' 'killed'.format(execution_timeout)) raise TimeoutError('HANDLER', msg) if show_memory_peak: mm_handler_conn.send('STOP') memory_monitor.join() peak_memory_usage = int(mm_handler_conn.recv()) logger.info("Peak memory usage: {}".format( sizeof_fmt(peak_memory_usage))) call_status.response['peak_memory_usage'] = peak_memory_usage if not handler_conn.poll(): logger.error( 'No completion message received from JobRunner process') logger.debug('Assuming memory overflow...') # Only 1 message is returned by jobrunner when it finishes. # If no message, this means that the jobrunner process was killed. # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM. msg = 'Function exceeded maximum memory and was killed' raise MemoryError('HANDLER', msg) if os.path.exists(jobrunner_stats_filename): with open(jobrunner_stats_filename, 'r') as fid: for l in fid.readlines(): key, value = l.strip().split(" ", 1) try: call_status.response[key] = float(value) except Exception: call_status.response[key] = value if key in [ 'exception', 'exc_pickle_fail', 'result', 'new_futures' ]: call_status.response[key] = eval(value) except Exception: # internal runtime exceptions print('----------------------- EXCEPTION !-----------------------', flush=True) traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------', flush=True) call_status.response['exception'] = True pickled_exc = pickle.dumps(sys.exc_info()) pickle.loads( pickled_exc) # this is just to make sure they can be unpickled call_status.response['exc_info'] = str(pickled_exc) finally: call_status.response['end_tstamp'] = time.time() call_status.send('__end__') for key in extra_env: os.environ.pop(key) logger.info("Finished")
def function_handler(event): start_time = time.time() log_level = event['log_level'] cloud_logging_config(log_level) logger.debug("Action handler started") extra_env = event.get('extra_env', {}) os.environ.update(extra_env) config = event['config'] call_status = CallStatus(config) call_status.response['host_submit_time'] = event['host_submit_time'] call_status.response['start_time'] = start_time context_dict = { 'python_version': os.environ.get("PYTHON_VERSION"), } call_id = event['call_id'] job_id = event['job_id'] executor_id = event['executor_id'] exec_id = "{}/{}/{}".format(executor_id, job_id, call_id) logger.info("Execution ID: {}".format(exec_id)) execution_timeout = event['execution_timeout'] logger.debug("Set function execution timeout to {}s".format(execution_timeout)) func_key = event['func_key'] data_key = event['data_key'] data_byte_range = event['data_byte_range'] call_status.response['call_id'] = call_id call_status.response['job_id'] = job_id call_status.response['executor_id'] = executor_id call_status.response['activation_id'] = os.environ.get('__OW_ACTIVATION_ID') try: if version.__version__ != event['pywren_version']: raise Exception("WRONGVERSION", "PyWren version mismatch", version.__version__, event['pywren_version']) # send init status event call_status.send('__init__') # call_status.response['free_disk_bytes'] = free_disk_space("/tmp") custom_env = {'PYWREN_CONFIG': json.dumps(config), 'PYWREN_FUNCTION': 'True', 'PYWREN_EXECUTION_ID': exec_id, 'PYWREN_STORAGE_BUCKET': config['pywren']['storage_bucket'], 'PYTHONPATH': "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH), 'PYTHONUNBUFFERED': 'True'} os.environ.update(custom_env) # if os.path.exists(JOBRUNNER_STATS_BASE_DIR): # shutil.rmtree(JOBRUNNER_STATS_BASE_DIR, True) jobrunner_stats_dir = os.path.join(STORAGE_BASE_DIR, executor_id, job_id, call_id) os.makedirs(jobrunner_stats_dir, exist_ok=True) jobrunner_stats_filename = os.path.join(jobrunner_stats_dir, 'jobrunner.stats.txt') jobrunner_config = {'pywren_config': config, 'call_id': call_id, 'job_id': job_id, 'executor_id': executor_id, 'func_key': func_key, 'data_key': data_key, 'log_level': log_level, 'data_byte_range': data_byte_range, 'output_key': create_output_key(JOBS_PREFIX, executor_id, job_id, call_id), 'stats_filename': jobrunner_stats_filename} setup_time = time.time() call_status.response['setup_time'] = round(setup_time - start_time, 8) handler_conn, jobrunner_conn = Pipe() jobrunner = JobRunner(jobrunner_config, jobrunner_conn) logger.debug('Starting JobRunner process') local_execution = strtobool(os.environ.get('LOCAL_EXECUTION', 'False')) if local_execution: jrp = Thread(target=jobrunner.run) else: jrp = Process(target=jobrunner.run) jrp.daemon = True jrp.start() jrp.join(execution_timeout) logger.debug('JobRunner process finished') call_status.response['exec_time'] = round(time.time() - setup_time, 8) if jrp.is_alive(): # If process is still alive after jr.join(job_max_runtime), kill it try: jrp.terminate() except Exception: # thread does not have terminate method pass msg = ('Jobrunner process exceeded maximum time of {} ' 'seconds and was killed'.format(execution_timeout)) raise Exception('OUTATIME', msg) try: handler_conn.recv() except EOFError: logger.error('No completion message received from JobRunner process') logger.debug('Assuming memory overflow...') # Only 1 message is returned by jobrunner when it finishes. # If no message, this means that the jobrunner process was killed. # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM. msg = 'Jobrunner process exceeded maximum memory and was killed' raise Exception('OUTOFMEMORY', msg) # print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True)) # print(subprocess.check_output("find {}".format(os.getcwd()), shell=True)) if os.path.exists(jobrunner_stats_filename): with open(jobrunner_stats_filename, 'r') as fid: for l in fid.readlines(): key, value = l.strip().split(" ", 1) try: call_status.response[key] = float(value) except Exception: call_status.response[key] = value if key in ['exception', 'exc_pickle_fail', 'result', 'new_futures']: call_status.response[key] = eval(value) # call_status.response['server_info'] = get_server_info() call_status.response.update(context_dict) call_status.response['end_time'] = time.time() except Exception: # internal runtime exceptions print('----------------------- EXCEPTION !-----------------------', flush=True) traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------', flush=True) call_status.response['end_time'] = time.time() call_status.response['exception'] = True pickled_exc = pickle.dumps(sys.exc_info()) pickle.loads(pickled_exc) # this is just to make sure they can be unpickled call_status.response['exc_info'] = str(pickled_exc) finally: call_status.send('__end__') logger.info("Finished")