def reduce_function_wrapper(fut_list, internal_storage, storage, ibm_cos): logger.info('Waiting for results') if 'SHOW_MEMORY_USAGE' in os.environ: show_memory = eval(os.environ['SHOW_MEMORY_USAGE']) else: show_memory = False # Wait for all results wait(fut_list, executor_id, internal_storage, download_results=True) results = [ f.result() for f in fut_list if f.done and not f.futures ] reduce_func_args = {'results': results} if show_memory: logger.debug( "Memory usage after getting the results: {}".format( wrenutil.get_current_memory_usage())) # Run reduce function func_sig = inspect.signature(reduce_function) if 'storage' in func_sig.parameters: reduce_func_args['storage'] = storage if 'ibm_cos' in func_sig.parameters: reduce_func_args['ibm_cos'] = ibm_cos return reduce_function(**reduce_func_args)
def run(self): """ Runs the function """ logger.info("Started") # initial output file in case job fails output_dict = {'result': None, 'success': False} pickled_output = pickle.dumps(output_dict) try: self.internal_storage = storage.InternalStorage(self.storage_config) loaded_func_all = self._get_function_and_modules() self._save_modules(loaded_func_all['module_data']) function = self._unpickle_function(loaded_func_all['func']) data = self._load_data() data = self._create_storage_clients(function, data) if self.show_memory: logger.debug("Memory usage before call the function: {}".format(get_current_memory_usage())) logger.info("Function: Going to execute '{}()'".format(str(function.__name__))) print('------------------- FUNCTION LOG -------------------', flush=True) func_exec_time_t1 = time.time() result = function(**data) func_exec_time_t2 = time.time() print('----------------------------------------------------', flush=True) logger.info("Function: Success execution") if self.show_memory: logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage())) self.stats.write('function_exec_time', round(func_exec_time_t2-func_exec_time_t1, 8)) output_dict = {'result': result, 'success': True} pickled_output = pickle.dumps(output_dict) # Check for new futures if isinstance(result, ResponseFuture): callgroup_id = result.callgroup_id self.stats.write('new_futures', '{}/{}'.format(callgroup_id, 1)) elif type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture): callgroup_id = result[0].callgroup_id self.stats.write('new_futures', '{}/{}'.format(callgroup_id, len(result))) else: self.stats.write('new_futures', '{}/{}'.format(None, 0)) if self.show_memory: logger.debug("Memory usage after output serialization: {}".format(get_current_memory_usage())) except Exception as e: print('------------------ EXCEPTION -------------------------') exc_type, exc_value, exc_traceback = sys.exc_info() #traceback.print_tb(exc_traceback) # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails logger.error("There was an exception: {}".format(str(e))) try: pickled_output = pickle.dumps({'result': e, 'exc_type': exc_type, 'exc_value': exc_value, 'exc_traceback': exc_traceback, 'sys.path': sys.path, 'success': False}) # this is just to make sure they can be unpickled pickle.loads(pickled_output) except Exception as pickle_exception: pickled_output = pickle.dumps({'result': str(e), 'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'exc_traceback_str': str(exc_traceback), 'sys.path': sys.path, 'pickle_fail': True, 'pickle_exception': pickle_exception, 'success': False}) finally: store_result = True if 'STORE_RESULT' in os.environ: store_result = eval(os.environ['STORE_RESULT']) if store_result: output_upload_timestamp_t1 = time.time() logger.info("Storing {} - Size: {}".format(self.output_key, sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_timestamp_t2 = time.time() self.stats.write("output_upload_time", round(output_upload_timestamp_t2 - output_upload_timestamp_t1, 8)) self.result_queue.put("Finished") logger.info("Finished")
def run(self): """ Runs the function """ logger.info("Started") # initial output file in case job fails result = None exception = False try: self.internal_storage = InternalStorage(self.storage_config) self.internal_storage.tmp_obj_prefix = self.output_key.rsplit('/', 1)[0] loaded_func_all = self._get_function_and_modules() self._save_modules(loaded_func_all['module_data']) function = self._unpickle_function(loaded_func_all['func']) data = self._load_data() data = self._create_storage_clients(function, data) if self.show_memory: logger.debug("Memory usage before call the function: {}".format(get_current_memory_usage())) logger.info("Function: Going to execute '{}()'".format(str(function.__name__))) print('---------------------- FUNCTION LOG ----------------------', flush=True) func_exec_time_t1 = time.time() result = function(**data) func_exec_time_t2 = time.time() print('----------------------------------------------------------', flush=True) logger.info("Function: Success execution") if self.show_memory: logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage())) self.stats.write('function_exec_time', round(func_exec_time_t2-func_exec_time_t1, 8)) # Check for new futures if result is not None: self.stats.write("result", True) if isinstance(result, ResponseFuture): callgroup_id = result.callgroup_id self.stats.write('new_futures', '{}/{}'.format(callgroup_id, 1)) elif type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture): callgroup_id = result[0].callgroup_id self.stats.write('new_futures', '{}/{}'.format(callgroup_id, len(result))) else: self.stats.write('new_futures', '{}/{}'.format(None, 0)) logger.debug("Pickling result") output_dict = {'result': result} pickled_output = pickle.dumps(output_dict) if self.show_memory: logger.debug("Memory usage after output serialization: {}".format(get_current_memory_usage())) else: logger.debug("No result to store") self.stats.write("result", False) except Exception as e: exception = True self.stats.write("exception", True) print('----------------------- EXCEPTION !-----------------------') logger.error("There was an exception: {}".format(str(e))) print('----------------------------------------------------------', flush=True) if self.show_memory: logger.debug("Memory usage after call the function: {}".format(get_current_memory_usage())) try: logger.debug("Pickling exception") pickled_exc = pickle.dumps(sys.exc_info()) pickle.loads(pickled_exc) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) except Exception as pickle_exception: # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails logger.debug("Failed pickling exception: {}".format(str(pickle_exception))) self.stats.write("exc_pickle_fail", True) exc_type, exc_value, exc_traceback = sys.exc_info() pickled_exc = pickle.dumps({'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'pickle_exception': pickle_exception}) pickle.loads(pickled_exc) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) finally: store_result = strtobool(os.environ.get('STORE_RESULT', 'True')) if result is not None and store_result and not exception: output_upload_timestamp_t1 = time.time() logger.info("Storing function result - output.pickle - Size: {}".format(sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_timestamp_t2 = time.time() self.stats.write("output_upload_time", round(output_upload_timestamp_t2 - output_upload_timestamp_t1, 8)) self.result_queue.put("Finished") logger.info("Finished")