def _invoke_job(self, job): """ Normal Invocation Use local threads to perform all the function invocations """ if self.remote_invoker: return self._invoke_job_remote(job) if self.should_run is False: self.running_workers = 0 self.should_run = True self._start_async_invokers() if self.running_workers < self.workers: free_workers = self.workers - self.running_workers total_direct = free_workers * job.chunksize callids = range(job.total_calls) callids_to_invoke_direct = callids[:total_direct] callids_to_invoke_nondirect = callids[total_direct:] ci = len(callids_to_invoke_direct) cz = job.chunksize consumed_workers = ci // cz + (ci % cz > 0) self.running_workers += consumed_workers logger.debug('ExecutorID {} | JobID {} - Free workers:' ' {} - Going to run {} activations in {} workers' .format(job.executor_id, job.job_id, free_workers, len(callids_to_invoke_direct), consumed_workers)) def _callback(future): future.result() invoke_futures = [] executor = ThreadPoolExecutor(job.invoke_pool_threads) for call_ids_range in iterchunks(callids_to_invoke_direct, job.chunksize): future = executor.submit(self._invoke_task, job, call_ids_range) future.add_done_callback(_callback) invoke_futures.append(future) if self.sync: [f.result() for f in invoke_futures] # Put into the queue the rest of the callids to invoke within the process if callids_to_invoke_nondirect: logger.debug('ExecutorID {} | JobID {} - Putting remaining ' '{} function activations into pending queue' .format(job.executor_id, job.job_id, len(callids_to_invoke_nondirect))) for call_ids_range in iterchunks(callids_to_invoke_nondirect, job.chunksize): self.pending_calls_q.put((job, call_ids_range)) else: logger.debug('ExecutorID {} | JobID {} - Reached maximun {} ' 'workers, queuing {} function activations' .format(job.executor_id, job.job_id, self.workers, job.total_calls)) for call_ids_range in iterchunks(job.total_calls, job.chunksize): self.pending_calls_q.put((job, call_ids_range))
def run_job_process(job_payload, work_queue): """ Process responsible to wait for workers to become ready, and submit individual tasks of the job to them """ job_key = job_payload['job_key'] call_ids = job_payload['call_ids'] chunksize = job_payload['chunksize'] workers = job_payload['worker_instances'] for call_ids_range in iterchunks(call_ids, chunksize): task_payload = copy.deepcopy(job_payload) dbr = task_payload['data_byte_ranges'] task_payload['call_ids'] = call_ids_range task_payload['data_byte_ranges'] = [ dbr[int(call_id)] for call_id in call_ids_range ] work_queue.put(task_payload) logger.info("Total tasks in {} work queue: {}".format( job_key, work_queue.qsize())) with ThreadPoolExecutor(len(workers)) as executor: for worker_info in workers: executor.submit(setup_worker, worker_info, work_queue, job_key) logger.info('All workers set up for job {}'.format(job_key)) while not work_queue.empty(): time.sleep(1) done = os.path.join(JOBS_DIR, job_key + '.done') Path(done).touch() logger.info('Finished job {} invocation.'.format(job_key))
def main_job(action, encoded_payload): logger.info( "Lithops v{} - Starting Code Engine execution".format(__version__)) payload = b64str_to_dict(encoded_payload) setup_lithops_logger(payload['log_level']) if (action == 'preinstalls'): runtime_packages(payload) return {"Execution": "Finished"} job_index = int(os.environ['JOB_INDEX']) payload['JOB_INDEX'] = job_index logger.info("Action {}. Job Index {}".format(action, job_index)) act_id = str(uuid.uuid4()).replace('-', '')[:12] os.environ['__LITHOPS_ACTIVATION_ID'] = act_id chunksize = payload['chunksize'] call_ids_ranges = [ call_ids_range for call_ids_range in iterchunks(payload['call_ids'], chunksize) ] call_ids = call_ids_ranges[job_index] data_byte_ranges = [ payload['data_byte_ranges'][int(call_id)] for call_id in call_ids ] payload['call_ids'] = call_ids payload['data_byte_ranges'] = data_byte_ranges function_handler(payload) return {"Execution": "Finished"}
def run_job(encoded_payload): logger.info( "Lithops v{} - Starting kubernetes execution".format(__version__)) payload = b64str_to_dict(encoded_payload) setup_lithops_logger(payload['log_level']) job_key = payload['job_key'] idgiver_ip = os.environ['IDGIVER_POD_IP'] res = requests.get('http://{}:{}/getid/{}'.format(idgiver_ip, IDGIVER_PORT, job_key)) job_index = int(res.text) act_id = str(uuid.uuid4()).replace('-', '')[:12] os.environ['__LITHOPS_ACTIVATION_ID'] = act_id os.environ['__LITHOPS_BACKEND'] = 'k8s' logger.info("Activation ID: {} - Job Index: {}".format(act_id, job_index)) chunksize = payload['chunksize'] call_ids_ranges = [ call_ids_range for call_ids_range in iterchunks(payload['call_ids'], chunksize) ] call_ids = call_ids_ranges[job_index] data_byte_ranges = [ payload['data_byte_ranges'][int(call_id)] for call_id in call_ids ] payload['call_ids'] = call_ids payload['data_byte_ranges'] = data_byte_ranges function_handler(payload)
def run(self, job_payload): """ Run a job described in job_description """ job = SimpleNamespace(**job_payload) job.total_calls = len(job.call_ids) logger.info('ExecutorID {} | JobID {} - Starting function ' 'invocation - Total: {} activations' .format(job.executor_id, job.job_id, job.total_calls)) logger.info('ExecutorID {} | JobID {} - Chunksize:' ' {} - Worker processes: {}' .format(job.executor_id, job.job_id, job.chunksize, job.worker_processes)) for i in range(self.num_workers): self.token_bucket_q.put('#') for call_ids_range in iterchunks(job.call_ids, job.chunksize): self.pending_calls_q.put((job, call_ids_range)) self.job_monitor.start_job_monitoring(job) invokers = [] for inv_id in range(self.num_invokers): p = mp.Process(target=self._run_process, args=(inv_id, )) p.daemon = True p.start() invokers.append(p) for p in invokers: p.join()
def run_job_worker(job_payload, work_queue): """ Process responsible to wait for workers to become ready, and submit individual tasks of the job to them """ job_key = job_payload['job_key'] call_ids = job_payload['call_ids'] chunksize = job_payload['chunksize'] for call_ids_range in iterchunks(call_ids, chunksize): task_payload = copy.deepcopy(job_payload) dbr = task_payload['data_byte_ranges'] task_payload['call_ids'] = call_ids_range task_payload['data_byte_ranges'] = [ dbr[int(call_id)] for call_id in call_ids_range ] work_queue.put(task_payload) while not work_queue.empty(): time.sleep(1) done = os.path.join(JOBS_DIR, job_key + '.done') Path(done).touch() logger.debug(f'Job process {job_key} finished')
def run_job(encoded_payload): logger.info( "Lithops v{} - Starting kubernetes execution".format(__version__)) payload = b64str_to_dict(encoded_payload) setup_lithops_logger(payload['log_level']) total_calls = payload['total_calls'] job_key = payload['job_key'] master_ip = os.environ['MASTER_POD_IP'] chunksize = payload['chunksize'] call_ids_ranges = [ call_ids_range for call_ids_range in iterchunks(payload['call_ids'], chunksize) ] data_byte_ranges = payload['data_byte_ranges'] job_finished = False while not job_finished: job_index = None while job_index is None: try: url = f'http://{master_ip}:{MASTER_PORT}/getid/{job_key}/{total_calls}' res = requests.get(url) job_index = int(res.text) except Exception: time.sleep(0.1) if job_index == -1: job_finished = True continue act_id = str(uuid.uuid4()).replace('-', '')[:12] os.environ['__LITHOPS_ACTIVATION_ID'] = act_id os.environ['__LITHOPS_BACKEND'] = 'k8s' logger.info("Activation ID: {} - Job Index: {}".format( act_id, job_index)) call_ids = call_ids_ranges[job_index] dbr = [data_byte_ranges[int(call_id)] for call_id in call_ids] payload['call_ids'] = call_ids payload['data_byte_ranges'] = dbr function_handler(payload)
def run_create(): """ Runs a given job remotely in workers, in create mode """ global BUDGET_KEEPER logger.info('Running job on worker VMs') job_payload = flask.request.get_json(force=True, silent=True) if job_payload and not isinstance(job_payload, dict): return error('The action did not receive a dictionary as an argument.') try: runtime = job_payload['runtime_name'] verify_runtime_name(runtime) except Exception as e: return error(str(e)) job_key = job_payload['job_key'] call_ids = job_payload['call_ids'] chunksize = job_payload['chunksize'] workers = job_payload['woreker_instances'] BUDGET_KEEPER.last_usage_time = time.time() BUDGET_KEEPER.update_config(job_payload['config']['standalone']) BUDGET_KEEPER.jobs[job_key] = 'running' with ThreadPoolExecutor(len(workers)) as executor: for call_ids_range in iterchunks(call_ids, chunksize): worker_info = workers.pop(0) executor.submit(run_job_on_worker, worker_info, call_ids_range, copy.deepcopy(job_payload)) done = os.path.join(JOBS_DIR, job_key + '.done') Path(done).touch() act_id = str(uuid.uuid4()).replace('-', '')[:12] response = flask.jsonify({'activationId': act_id}) response.status_code = 202 return response
def run(self, job): """ Run a job described in job_description """ job.runtime_name = self.runtime_name try: while True: self.token_bucket_q.get_nowait() self.ongoing_activations -= 1 except Exception: pass self.prometheus.send_metric(name='job_total_calls', value=job.total_calls, labels=(('executor_id', job.executor_id), ('job_id', job.job_id), ('function_name', job.function_name))) if self.remote_invoker: """ Remote Invocation Use a single cloud function to perform all the function invocations """ log_level = logger.getEffectiveLevel() logging.getLogger('lithops').setLevel(logging.CRITICAL) self.select_runtime(job.job_id, self.REMOTE_INVOKER_MEMORY) logging.getLogger('lithops').setLevel(log_level) log_msg = ('ExecutorID {} | JobID {} - Starting function ' 'invocation: {}() - Total: {} activations'.format( job.executor_id, job.job_id, job.function_name, job.total_calls)) logger.info(log_msg) self._invoke_remote(job) # th = Thread(target=self._invoke_remote, args=(job,), daemon=True) # th.start() else: """ Normal Invocation Use local threads to perform all the function invocations """ try: if self.running_flag.value == 0: self.running_workers = 0 self.running_flag.value = 1 self._start_invoker_process() log_msg = ('ExecutorID {} | JobID {} - Starting function ' 'invocation: {}() - Total: {} activations'.format( job.executor_id, job.job_id, job.function_name, job.total_calls)) logger.info(log_msg) logger.debug('ExecutorID {} | JobID {} - Chunksize:' ' {} - Worker processes: {}'.format( job.executor_id, job.job_id, job.chunksize, job.worker_processes)) if self.running_workers < self.workers: free_workers = self.workers - self.running_workers total_direct = free_workers * job.chunksize callids = range(job.total_calls) callids_to_invoke_direct = callids[:total_direct] callids_to_invoke_nondirect = callids[total_direct:] ci = len(callids_to_invoke_direct) cz = job.chunksize consumed_workers = ci // cz + (ci % cz > 0) self.running_workers += consumed_workers logger.debug( 'ExecutorID {} | JobID {} - Free workers:' ' {} - Going to run {} activations in {} workers'. format(job.executor_id, job.job_id, free_workers, len(callids_to_invoke_direct), consumed_workers)) def _callback(future): future.result() executor = ThreadPoolExecutor(job.invoke_pool_threads) for call_ids_range in iterchunks(callids_to_invoke_direct, job.chunksize): future = executor.submit(self._invoke, job, call_ids_range) future.add_done_callback(_callback) time.sleep(0.1) # Put into the queue the rest of the callids to invoke within the process if callids_to_invoke_nondirect: logger.debug( 'ExecutorID {} | JobID {} - Putting remaining ' '{} function activations into pending queue'. format(job.executor_id, job.job_id, len(callids_to_invoke_nondirect))) for call_ids_range in iterchunks( callids_to_invoke_nondirect, job.chunksize): self.pending_calls_q.put((job, call_ids_range)) else: logger.debug( 'ExecutorID {} | JobID {} - Reached maximun {} ' 'workers, queuing {} function activations'.format( job.executor_id, job.job_id, self.workers, job.total_calls)) for call_ids_range in iterchunks(job.total_calls, job.chunksize): self.pending_calls_q.put((job, call_ids_range)) self.job_monitor.start_job_monitoring(job) except (KeyboardInterrupt, Exception) as e: self.stop() raise e log_file = os.path.join(LOGS_DIR, job.job_key + '.log') logger.info( "ExecutorID {} | JobID {} - View execution logs at {}".format( job.executor_id, job.job_id, log_file)) # Create all futures futures = [] for i in range(job.total_calls): call_id = "{:05d}".format(i) fut = ResponseFuture(call_id, job, job.metadata.copy(), self.storage_config) fut._set_state(ResponseFuture.State.Invoked) futures.append(fut) return futures
lithops_payload = json.loads(lithops_payload_json) setup_lithops_logger(lithops_payload.get('log_level', logging.INFO)) logger.info( "Lithops v{} - Starting AWS Batch execution".format(__version__)) job_index = int(os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX', 0)) lithops_payload['JOB_INDEX'] = job_index logger.info('Job index {}'.format(job_index)) act_id = str(uuid.uuid4()).replace('-', '')[:12] os.environ['__LITHOPS_ACTIVATION_ID'] = act_id chunksize = lithops_payload['chunksize'] call_ids_ranges = [ call_ids_range for call_ids_range in iterchunks( lithops_payload['call_ids'], chunksize) ] call_ids = call_ids_ranges[job_index] data_byte_ranges = [ lithops_payload['data_byte_ranges'][int(call_id)] for call_id in call_ids ] lithops_payload['call_ids'] = call_ids lithops_payload['data_byte_ranges'] = data_byte_ranges function_handler(lithops_payload) else: raise Exception('Unknown action {}'.format(action))