def run_do(self): logger.info("{} job are running.".format(running_job_amount())) try: if running_job_amount() < MAX_CONCURRENT_JOB_RUN: wait_jobs = get_job_from_queue(status="waiting", limit=1) if wait_jobs: wait_job = wait_jobs[0] run_job_id = wait_job.job_id try: run_job_success = self.run_job(job_id=run_job_id, config=json.loads( wait_job.config)) except Exception as e: run_job_success = False logger.exception(e) if run_job_success: update_job_queue(job_id=run_job_id, role=wait_job.role, party_id=wait_job.party_id, save_data={"status": "ready"}) else: pop_from_job_queue(job_id=run_job_id) logger.info("check waiting jobs done.") self.check_job() except Exception as e: logger.exception(e)
def run_do(self): logger.info("{} job are running.".format(running_job_amount())) if running_job_amount() < MAX_CONCURRENT_JOB_RUN: wait_jobs = get_job_from_queue(status="waiting", limit=1) if wait_jobs: update_job_queue(job_id=wait_jobs[0].get("job_id"), update_data={"status": "ready"}) self.run_job(wait_jobs[0].get("job_id"), json.loads(wait_jobs[0].get("config")))
def update_job(job_id): request_data = request.json update_job_by_id(job_id=job_id, update_data={"status": request_data.get("status")}) update_job_queue(job_id=job_id, update_data={"status": request_data.get("status")}) if request_data.get("status") in ["failed", "deleted"]: stop_job(job_id=job_id) if request_data.get("status") in ["failed", "deleted", "success"]: pop_from_job_queue(job_id=job_id) return get_json_result()
def start_workflow(job_id, module, role): _data = request.json _job_dir = get_job_directory(job_id) _party_id = str(_data['local']['party_id']) _method = _data['WorkFlowParam']['method'] conf_path_dir = os.path.join(_job_dir, _method, module, role, _party_id) os.makedirs(conf_path_dir, exist_ok=True) conf_file_path = os.path.join(conf_path_dir, 'runtime_conf.json') with open(conf_file_path, 'w+') as f: f.truncate() f.write(json.dumps(_data, indent=4)) f.flush() if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.wShowWindow = subprocess.SW_HIDE else: startupinfo = None task_pid_path = os.path.join(_job_dir, 'pids') std_log = open(os.path.join(_job_dir, role + '.std.log'), 'w') progs = [ "python3", os.path.join(file_utils.get_project_base_directory(), _data['CodePath']), "-j", job_id, "-c", os.path.abspath(conf_file_path) ] logger.info('Starting progs: {}'.format(" ".join(progs))) p = subprocess.Popen(progs, stdout=std_log, stderr=std_log, startupinfo=startupinfo) os.makedirs(task_pid_path, exist_ok=True) with open(os.path.join(task_pid_path, role + ".pid"), 'w') as f: f.truncate() f.write(str(p.pid) + "\n") f.flush() job_data = dict() job_data["begin_date"] = datetime.datetime.now() job_data["status"] = "ready" with open(conf_file_path) as fr: config = json.load(fr) job_data.update(config) job_data["my_role"] = config.get("local", {}).get("role") save_job_info(job_id=job_id, **job_data) update_job_queue(job_id=job_id, update_data={"status": "ready"}) return get_json_result(msg="success, pid is %s" % p.pid)
def start_workflow(job_id, module, role): _config = request.json _job_dir = get_job_directory(job_id) _party_id = str(_config['local']['party_id']) _method = _config['WorkFlowParam']['method'] default_runtime_dict = file_utils.load_json_conf( 'workflow/conf/default_runtime_conf.json') fill_runtime_conf_table_info(runtime_conf=_config, default_runtime_conf=default_runtime_dict) conf_file_path = new_runtime_conf(job_dir=_job_dir, method=_method, module=module, role=role, party_id=_party_id) with open(conf_file_path, 'w+') as f: f.truncate() f.write(json.dumps(_config, indent=4)) f.flush() progs = [ "python3", os.path.join(file_utils.get_project_base_directory(), _config['CodePath']), "-j", job_id, "-c", os.path.abspath(conf_file_path) ] p = run_subprocess(job_dir=_job_dir, job_role=role, progs=progs) job_status = "start" job_data = dict() job_data["begin_date"] = datetime.datetime.now() job_data["status"] = job_status job_data.update(_config) job_data["pid"] = p.pid job_data["all_party"] = json.dumps(_config.get("role", {})) job_data["initiator"] = _config.get("JobParam", {}).get("initiator") save_job_info(job_id=job_id, role=_config.get("local", {}).get("role"), party_id=_config.get("local", {}).get("party_id"), save_info=job_data, create=True) update_job_queue(job_id=job_id, role=role, party_id=_party_id, save_data={ "status": job_status, "pid": p.pid }) return get_json_result(data={'pid': p.pid}, job_id=job_id)
def update_job(job_id, role, party_id): request_data = request.json logger.info('job_id:{} role:{} party_id:{} status:{}'.format( job_id, role, party_id, request_data.get('status'))) job_info = save_job_info(job_id=job_id, role=role, party_id=party_id, save_info={"status": request_data.get("status")}) if not job_info: logger.info( 'job_id {} may not be started by the Task Manager.'.format(job_id)) return get_json_result( job_id=job_id, status=101, msg='this task may not be started by the Task Manager.') update_job_queue(job_id=job_id, role=role, party_id=party_id, save_data={"status": request_data.get("status")}) if request_data.get("status") in ["success", "failed", "deleted"]: pop_from_job_queue(job_id=job_id) if is_job_initiator(job_info.initiator, PARTY_ID): # I am job initiator logger.info('i am job {} initiator'.format(job_id)) # check job status jobs = query_job_by_id(job_id=job_id) job_status = set([job.status for job in jobs]) do_stop_job = False if 'failed' in job_status or 'deleted' in job_status: do_stop_job = True elif len(job_status) == 1 and 'success' in job_status: do_stop_job = True if do_stop_job: stop_job(job_id=job_id) else: # send job status to initiator if not request_data.get('initiatorUpdate', False): request_data['initiatorUpdate'] = True federated_api(job_id=job_id, method='POST', url='/job/jobStatus/{}/{}/{}'.format( job_id, role, party_id), party_id=job_info.initiator, json_body=request_data) return get_json_result(job_id=job_id)