def dtable(table_func): config = request.json if table_func == 'tableInfo': table_name, namespace = get_table_info(config=config, create=config.get('create', False)) dtable = storage.get_data_table(name=table_name, namespace=namespace) if dtable: table_key_count = dtable.count() else: table_key_count = 0 return get_json_result(data={'table_name': table_name, 'namespace': namespace, 'count': table_key_count}) else: return get_json_result()
def stop_workflow(job_id, role, party_id): _job_dir = get_job_directory(job_id) task_pid_path = os.path.join(_job_dir, 'pids') if os.path.isdir(task_pid_path): for pid_file in os.listdir(task_pid_path): try: if not pid_file.endswith('.pid'): continue with open(os.path.join(task_pid_path, pid_file), 'r') as f: pids = f.read().split('\n') for pid in pids: try: if len(pid) == 0: continue logger.debug( "terminating process pid:{} {}".format( pid, pid_file)) p = psutil.Process(int(pid)) for child in p.children(recursive=True): child.kill() p.kill() except NoSuchProcess: continue except Exception as e: logger.exception("error") continue set_job_failed(job_id=job_id, role=role, party_id=party_id) pop_from_job_queue(job_id=job_id) clean_job(job_id=job_id) return get_json_result(job_id=job_id)
def publish_model_online(): request_config = request.json if not request_config.get('servings'): # get my party all servings request_config['servings'] = SERVINGS publish_model.publish_online(config_data=request_config) return get_json_result()
def update_job(job_id, role, party_id): request_data = request.json logger.info('job_id:{} role:{} party_id:{} status:{}'.format( job_id, role, party_id, request_data.get('status'))) job_info = save_job_info(job_id=job_id, role=role, party_id=party_id, save_info={"status": request_data.get("status")}) if not job_info: logger.info( 'job_id {} may not be started by the Task Manager.'.format(job_id)) return get_json_result( job_id=job_id, status=101, msg='this task may not be started by the Task Manager.') update_job_queue(job_id=job_id, role=role, party_id=party_id, save_data={"status": request_data.get("status")}) if request_data.get("status") in ["success", "failed", "deleted"]: pop_from_job_queue(job_id=job_id) if is_job_initiator(job_info.initiator, PARTY_ID): # I am job initiator logger.info('i am job {} initiator'.format(job_id)) # check job status jobs = query_job_by_id(job_id=job_id) job_status = set([job.status for job in jobs]) do_stop_job = False if 'failed' in job_status or 'deleted' in job_status: do_stop_job = True elif len(job_status) == 1 and 'success' in job_status: do_stop_job = True if do_stop_job: stop_job(job_id=job_id) else: # send job status to initiator if not request_data.get('initiatorUpdate', False): request_data['initiatorUpdate'] = True federated_api(job_id=job_id, method='POST', url='/job/jobStatus/{}/{}/{}'.format( job_id, role, party_id), party_id=job_info.initiator, json_body=request_data) return get_json_result(job_id=job_id)
def load_model(): request_config = request.json _job_id = generate_job_id() all_party = set() for _party_ids in request_config.get('role').values(): all_party.update(set(_party_ids)) for _party_id in all_party: st, msg = federated_api(job_id=_job_id, method='POST', url='/model/load/do', party_id=_party_id, json_body=request_config) return get_json_result(job_id=_job_id)
def download_upload(data_func): request_config = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, request_config)) _job_dir = get_job_directory(_job_id) os.makedirs(_job_dir, exist_ok=True) module = data_func if module == "upload": if not os.path.isabs(request_config.get("file", "")): request_config["file"] = os.path.join(file_utils.get_project_base_directory(), request_config["file"]) try: request_config["work_mode"] = request_config.get('work_mode', WORK_MODE) table_name, namespace = dtable_utils.get_table_info(config=request_config, create=(True if module == 'upload' else False)) if not table_name or not namespace: return get_json_result(status=102, msg='no table name and namespace') request_config['table_name'] = table_name request_config['namespace'] = namespace conf_file_path = new_runtime_conf(job_dir=_job_dir, method=data_func, module=module, role=request_config.get('local', {}).get("role"), party_id=request_config.get('local', {}).get("party_id", PARTY_ID)) file_utils.dump_json_conf(request_config, conf_file_path) if module == "download": progs = ["python3", os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]), "-j", _job_id, "-c", conf_file_path ] else: progs = ["python3", os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]), "-c", conf_file_path ] p = run_subprocess(job_dir=_job_dir, job_role=data_func, progs=progs) return get_json_result(job_id=_job_id, data={'pid': p.pid, 'table_name': request_config['table_name'], 'namespace': request_config['namespace']}) except Exception as e: logger.exception(e) return get_json_result(status=-104, msg="failed", job_id=_job_id)
def start_workflow(job_id, module, role): _config = request.json _job_dir = get_job_directory(job_id) _party_id = str(_config['local']['party_id']) _method = _config['WorkFlowParam']['method'] default_runtime_dict = file_utils.load_json_conf( 'workflow/conf/default_runtime_conf.json') fill_runtime_conf_table_info(runtime_conf=_config, default_runtime_conf=default_runtime_dict) conf_file_path = new_runtime_conf(job_dir=_job_dir, method=_method, module=module, role=role, party_id=_party_id) with open(conf_file_path, 'w+') as f: f.truncate() f.write(json.dumps(_config, indent=4)) f.flush() progs = [ "python3", os.path.join(file_utils.get_project_base_directory(), _config['CodePath']), "-j", job_id, "-c", os.path.abspath(conf_file_path) ] p = run_subprocess(job_dir=_job_dir, job_role=role, progs=progs) job_status = "start" job_data = dict() job_data["begin_date"] = datetime.datetime.now() job_data["status"] = job_status job_data.update(_config) job_data["pid"] = p.pid job_data["all_party"] = json.dumps(_config.get("role", {})) job_data["initiator"] = _config.get("JobParam", {}).get("initiator") save_job_info(job_id=job_id, role=_config.get("local", {}).get("role"), party_id=_config.get("local", {}).get("party_id"), save_info=job_data, create=True) update_job_queue(job_id=job_id, role=role, party_id=_party_id, save_data={ "status": job_status, "pid": p.pid }) return get_json_result(data={'pid': p.pid}, job_id=job_id)
def load_model(): request_config = request.json _job_id = generate_job_id() if request_config.get('gen_table_info', False): publish_model.generate_model_info(request_config) for role_name, role_partys in request_config.get("role").items(): if role_name == 'arbiter': continue for _party_id in role_partys: request_config['local'] = { 'role': role_name, 'party_id': _party_id } st, msg = federated_api(job_id=_job_id, method='POST', url='/model/load/do', party_id=_party_id, json_body=request_config) return get_json_result(job_id=_job_id)
def stop_job(job_id): _job_dir = get_job_directory(job_id) all_party = [] for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**', 'runtime_conf.json'), recursive=True): runtime_conf = file_utils.load_json_conf( os.path.abspath(runtime_conf_path)) for _role, _party_ids in runtime_conf['role'].items(): all_party.extend([(_role, _party_id) for _party_id in _party_ids]) all_party = set(all_party) logger.info('start send stop job to {}'.format(','.join( [i[0] for i in all_party]))) _method = 'DELETE' for _role, _party_id in all_party: federated_api(job_id=job_id, method=_method, url='/workflow/{}/{}/{}'.format(job_id, _role, _party_id), party_id=_party_id) return get_json_result(job_id=job_id)
def stop_workflow(job_id, role, party_id): _job_dir = get_job_directory(job_id) task_pid_path = os.path.join(_job_dir, 'pids') if os.path.isdir(task_pid_path): for pid_file in os.listdir(task_pid_path): try: if not pid_file.endswith('.pid'): continue with open(os.path.join(task_pid_path, pid_file), 'r') as f: pids = f.read().split('\n') for pid in pids: try: if len(pid) == 0: continue logger.debug( "terminating process pid:{} {}".format( pid, pid_file)) p = psutil.Process(int(pid)) for child in p.children(recursive=True): child.kill() p.kill() except NoSuchProcess: continue except Exception as e: logger.exception("error") continue federated_api(job_id=job_id, method='POST', url='/job/jobStatus/{}/{}/{}'.format( job_id, role, party_id), party_id=party_id, json_body={ 'status': 'failed', 'stopJob': True }) clean_job(job_id=job_id) return get_json_result(job_id=job_id)
def internal_server_error(e): logger.exception(e) return get_json_result(status=100, msg=str(e))
def job_queue_status(): jobs = show_job_queue() return get_json_result(data=dict([(job.job_id, job.to_json()) for job in jobs]))
def query_job(job_id): jobs = query_job_by_id(job_id=job_id) return get_json_result(job_id=job_id, data=[job.to_json() for job in jobs])
def query_model_version_history(): history = version_history( data_table_namespace=request.json.get("namespace")) return get_json_result(data=history)
def get_runtime_conf(job_id): _job_dir = get_job_directory(job_id) response_data = [] shutil.copytree(_job_dir, job_id) return get_json_result(data=response_data, job_id=job_id)
def do_load_model(): request_data = request.json request_data["servings"] = server_conf.get("servers", {}).get("servings", []) publish_model.load_model(config_data=request_data) return get_json_result()
def query_model_version_history(): request_data = request.json config = file_utils.load_json_conf(request_data.get("config_path")) eggroll.init(mode=WORK_MODE) history = version_history(data_table_namespace=config.get("namespace")) return get_json_result(msg=json.dumps(history))
def submit_workflow_job(): _data = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, _data)) push_into_job_queue(job_id=_job_id, config=_data) return get_json_result(job_id=_job_id)