def sync(job_id): log.info(f'initiate executors syncing') try: while True: executors = list(get_job(job_id)['executors']) for executor in executors: try: response = heartbeat(executor, job_id) if response: executor['timestamp'] = response['time'] executor['agent_status'] = response['agent_status'] executor['job_status'] = response['job_status'] executor['job_id'] = response['job_id'] executor['filename'] = response['filename'] executor['submission_time'] = response[ 'submission_time'] else: log.info( f'no executor heartbeat for: {executor["name"]}') except Exception as e: pass if get_job(job_id)['job_status'] == 'completed': for executor in executors: executor['agent_status'] = 'offline' set_job(job_id, {'executors': executors}) break set_job(job_id, {'executors': executors}) time.sleep(5) except Exception as e: log.exception(f'error in sync: {e}')
def complete(self): completion_time = str(datetime.datetime.now()) set_job(self._job_id, {'job_status': 'completed', 'completion_time': completion_time}) submitter_url = get_job(self._job_id)['submitter_url'] submitter_port = get_job(self._job_id)['submitter_port'] orchestrator_url = get_job(self._job_id)['orchestrator_url'] orchestrator_port = get_job(self._job_id)['orchestrator_port'] requests.post(f'http://{submitter_url}:{submitter_port}/complete', params={'job_id': self._job_id, 'completion_time': completion_time, 'executor_name': get_global('agent_name'), 'executor_url': get_global('agent_url'), 'executor_port': get_global('agent_port') }) requests.post(f'http://{orchestrator_url}:{orchestrator_port}/complete', params={'job_id': self._job_id, 'agent_name': get_global('agent_name'), 'completion_time': completion_time}) self.report_job(self._job_id, 'completed')
def __init__(self, request) -> None: self.config = dict() try: jid = request.args.get('job_id') self.job_id = jid if jid is not None else uuid.uuid4().hex for arg in request.args.keys(): if arg in JOB_PARAMS: self.config[arg] = request.args.get(arg) set_job(self.job_id, request.args) self.set('job_status', 'created') except Exception as e: self.set('job_status', 'failed') raise Exception(f'failed to create job: {e}')
def complete(): try: job_id = request.args.get('job_id') completing_agent = request.args.get('agent_name') job_params = { 'job_status': 'completed', 'completion_time': request.args.get('completion_time'), 'executor_name': request.args.get('executor_name'), 'executor_url': request.args.get('executor_url'), 'executor_port': request.args.get('executor_port') } set_job(job_id, job_params) if get_job(job_id)['role'] == 'orchestrate': agent = Agent(job_id) for executor in list(get_job(job_id)['executors']): if executor['name'] != completing_agent: agent.request_abort(job_id, executor['url'], executor['port']) return str(job_params) except Exception as e: log.exception('unable to complete') return {}
def set(self, key: str, value: Any) -> None: self.config[key] = value set_job(self.job_id, {key: value})
def check_abort(agent, job_id): if get_job(job_id)['job_status'] == 'requested_abort': set_job(job_id, {'job_status': 'aborted'}) agent.log('aborted', report=True, job_id=job_id) return True return False