def create_map_job(config, internal_storage, executor_id, job_id, map_function, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, chunksize=None, worker_processes=None, extra_args=None, obj_chunk_size=None, obj_chunk_number=None, chunk_size=None, chunk_n=None): """ Wrapper to create a map job. It integrates COS logic to process objects. """ if chunk_size or chunk_n: print('>> WARNING: chunk_size and chunk_n parameters are deprecated' 'use obj_chunk_size and obj_chunk_number instead') obj_chunk_size = chunk_size obj_chunk_number = chunk_n host_job_meta = {'host_job_create_tstamp': time.time()} map_iterdata = utils.verify_args(map_function, iterdata, extra_args) # Object processing functionality ppo = None if utils.is_object_processing_function(map_function): create_partitions_start = time.time() # Create partitions according chunk_size or chunk_number logger.debug('ExecutorID {} | JobID {} - Calling map on partitions ' 'from object storage flow'.format(executor_id, job_id)) map_iterdata, ppo = create_partitions(config, internal_storage, map_iterdata, obj_chunk_size, obj_chunk_number) host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6) # ######## job = _create_job(config=config, internal_storage=internal_storage, executor_id=executor_id, job_id=job_id, func=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta) if ppo: job.parts_per_object = ppo return job
def create_map_job(config, internal_storage, executor_id, job_id, map_function, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, extra_args=None, obj_chunk_size=None, obj_chunk_number=None, invoke_pool_threads=128): """ Wrapper to create a map job. It integrates COS logic to process objects. """ host_job_meta = {'host_job_create_tstamp': time.time()} map_iterdata = utils.verify_args(map_function, iterdata, extra_args) if config['lithops'].get('rabbitmq_monitor', False): rabbit_amqp_url = config['rabbitmq'].get('amqp_url') utils.create_rabbitmq_resources(rabbit_amqp_url, executor_id, job_id) # Object processing functionality parts_per_object = None if is_object_processing_function(map_function): create_partitions_start = time.time() # Create partitions according chunk_size or chunk_number logger.debug('ExecutorID {} | JobID {} - Calling map on partitions ' 'from object storage flow'.format(executor_id, job_id)) map_iterdata, parts_per_object = create_partitions(config, internal_storage, map_iterdata, obj_chunk_size, obj_chunk_number) host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6) # ######## job = _create_job(config=config, internal_storage=internal_storage, executor_id=executor_id, job_id=job_id, func=map_function, iterdata=map_iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta, invoke_pool_threads=invoke_pool_threads) if parts_per_object: job.parts_per_object = parts_per_object return job
def run(self): """ Runs the function """ # self.stats.write('worker_jobrunner_start_tstamp', time.time()) logger.debug("Process started") result = None exception = False fn_name = None try: func = pickle.loads(self.job.func) data = pickle.loads(self.job.data) if strtobool(os.environ.get('__LITHOPS_REDUCE_JOB', 'False')): self._wait_futures(data) elif is_object_processing_function(func): self._load_object(data) self._fill_optional_args(func, data) fn_name = func.__name__ if inspect.isfunction(func) \ or inspect.ismethod(func) else type(func).__name__ self.prometheus.send_metric(name='function_start', value=time.time(), type='gauge', labels=(('job_id', self.job.job_key), ('call_id', '-'.join([ self.job.job_key, self.job.call_id ])), ('function_name', fn_name or 'undefined'))) logger.info("Going to execute '{}()'".format(str(fn_name))) print('---------------------- FUNCTION LOG ----------------------') function_start_tstamp = time.time() result = func(**data) function_end_tstamp = time.time() print('----------------------------------------------------------') logger.info("Success function execution") self.stats.write('worker_func_start_tstamp', function_start_tstamp) self.stats.write('worker_func_end_tstamp', function_end_tstamp) self.stats.write( 'worker_func_exec_time', round(function_end_tstamp - function_start_tstamp, 8)) # Check for new futures if result is not None: if isinstance(result, ResponseFuture) or isinstance(result, FuturesList) \ or (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)): self.stats.write('new_futures', pickle.dumps(result)) result = None else: self.stats.write("result", True) logger.debug("Pickling result") output_dict = {'result': result} pickled_output = pickle.dumps(output_dict) self.stats.write('func_result_size', len(pickled_output)) if result is None: logger.debug("No result to store") self.stats.write("result", False) self.stats.write('func_result_size', 0) # self.stats.write('worker_jobrunner_end_tstamp', time.time()) except Exception: exception = True self.stats.write("exception", True) exc_type, exc_value, exc_traceback = sys.exc_info() print('----------------------- EXCEPTION !-----------------------') traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------') try: logger.debug("Pickling exception") pickled_exc = pickle.dumps( (exc_type, exc_value, exc_traceback)) pickle.loads( pickled_exc ) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) except Exception as pickle_exception: # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails self.stats.write("exc_pickle_fail", True) pickled_exc = pickle.dumps({ 'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'pickle_exception': pickle_exception }) pickle.loads(pickled_exc ) # this is just to make sure it can be unpickled self.stats.write("exc_info", str(pickled_exc)) finally: self.prometheus.send_metric(name='function_end', value=time.time(), type='gauge', labels=(('job_id', self.job.job_key), ('call_id', '-'.join([ self.job.job_key, self.job.call_id ])), ('function_name', fn_name or 'undefined'))) store_result = strtobool(os.environ.get('STORE_RESULT', 'True')) if result is not None and store_result and not exception: output_upload_start_tstamp = time.time() logger.info("Storing function result - Size: {}".format( sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_end_tstamp = time.time() self.stats.write( "worker_result_upload_time", round( output_upload_end_tstamp - output_upload_start_tstamp, 8)) self.jobrunner_conn.send("Finished") logger.info("Process finished")
def run(self): """ Runs the function """ # self.stats.write('worker_jobrunner_start_tstamp', time.time()) logger.info("Started") result = None exception = False try: loaded_func_all = self._get_function_and_modules() self._save_modules(loaded_func_all['module_data']) function = self._unpickle_function(loaded_func_all['func']) data = self._load_data() if strtobool(os.environ.get('__PW_REDUCE_JOB', 'False')): self._wait_futures(data) elif is_object_processing_function(function): self._load_object(data) self._fill_optional_args(function, data) logger.info("Going to execute '{}()'".format(str( function.__name__))) print('---------------------- FUNCTION LOG ----------------------', flush=True) function_start_tstamp = time.time() result = function(**data) function_end_tstamp = time.time() print('----------------------------------------------------------', flush=True) logger.info("Success function execution") self.stats.write('worker_func_start_tstamp', function_start_tstamp) self.stats.write('worker_func_end_tstamp', function_end_tstamp) self.stats.write( 'worker_func_exec_time', round(function_end_tstamp - function_start_tstamp, 8)) # Check for new futures if result is not None: self.stats.write("result", True) if isinstance(result, ResponseFuture) or \ (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)): self.stats.write('new_futures', True) logger.debug("Pickling result") output_dict = {'result': result} pickled_output = pickle.dumps(output_dict) else: logger.debug("No result to store") self.stats.write("result", False) # self.stats.write('worker_jobrunner_end_tstamp', time.time()) except Exception: exception = True self.stats.write("exception", True) exc_type, exc_value, exc_traceback = sys.exc_info() print('----------------------- EXCEPTION !-----------------------', flush=True) traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------', flush=True) try: logger.debug("Pickling exception") pickled_exc = pickle.dumps( (exc_type, exc_value, exc_traceback)) pickle.loads( pickled_exc ) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) except Exception as pickle_exception: # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails self.stats.write("exc_pickle_fail", True) pickled_exc = pickle.dumps({ 'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'pickle_exception': pickle_exception }) pickle.loads(pickled_exc ) # this is just to make sure it can be unpickled self.stats.write("exc_info", str(pickled_exc)) finally: store_result = strtobool(os.environ.get('STORE_RESULT', 'True')) if result is not None and store_result and not exception: output_upload_start_tstamp = time.time() logger.info("Storing function result - Size: {}".format( sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_end_tstamp = time.time() self.stats.write( "worker_result_upload_time", round( output_upload_end_tstamp - output_upload_start_tstamp, 8)) self.jobrunner_conn.send("Finished") logger.info("Finished")