def submit_job(): work_mode = JobRuntimeConfigAdapter( request.json.get('job_runtime_conf', {})).get_job_work_mode() detect_utils.check_config({'work_mode': work_mode}, required_arguments=[('work_mode', (WorkMode.CLUSTER, WorkMode.STANDALONE))]) submit_result = DAGScheduler.submit(request.json) return get_json_result(retcode=0, retmsg='success', job_id=submit_result.get("job_id"), data=submit_result)
def download_upload(access_module): job_id = job_utils.generate_job_id() if access_module == "upload" and UPLOAD_DATA_FROM_CLIENT and not (request.json and request.json.get("use_local_data") == 0): file = request.files['file'] filename = os.path.join(job_utils.get_job_directory(job_id), 'fate_upload_tmp', file.filename) os.makedirs(os.path.dirname(filename), exist_ok=True) try: file.save(filename) except Exception as e: shutil.rmtree(os.path.join(job_utils.get_job_directory(job_id), 'fate_upload_tmp')) raise e job_config = request.args.to_dict() job_config['file'] = filename else: job_config = request.json required_arguments = ['work_mode', 'namespace', 'table_name'] if access_module == 'upload': required_arguments.extend(['file', 'head', 'partition']) elif access_module == 'download': required_arguments.extend(['output_path']) else: raise Exception('can not support this operating: {}'.format(access_module)) detect_utils.check_config(job_config, required_arguments=required_arguments) data = {} # compatibility if "table_name" in job_config: job_config["name"] = job_config["table_name"] if "backend" not in job_config: job_config["backend"] = 0 for _ in ["work_mode", "backend", "head", "partition", "drop"]: if _ in job_config: job_config[_] = int(job_config[_]) if access_module == "upload": if job_config.get('drop', 0) == 1: job_config["destroy"] = True else: job_config["destroy"] = False data['table_name'] = job_config["table_name"] data['namespace'] = job_config["namespace"] data_table_meta = storage.StorageTableMeta(name=job_config["table_name"], namespace=job_config["namespace"]) if data_table_meta and not job_config["destroy"]: return get_json_result(retcode=100, retmsg='The data table already exists.' 'If you still want to continue uploading, please add the parameter -drop.' ' 0 means not to delete and continue uploading, ' '1 means to upload again after deleting the table') job_dsl, job_runtime_conf = gen_data_access_job_config(job_config, access_module) submit_result = DAGScheduler.submit({'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf}, job_id=job_id) data.update(submit_result) return get_json_result(job_id=job_id, data=data)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: adapter = JobRuntimeConfigAdapter(train_runtime_conf) job_parameters = adapter.get_common_parameters().to_dict() with DB.connection_context(): db_model = MLModel.get_or_none( MLModel.f_job_id == job_parameters.get( "model_version"), MLModel.f_role == request_config["role"]) if not db_model: model_info = model_utils.gather_model_info_data(model) model_info['imported'] = 1 model_info['job_id'] = model_info['f_model_version'] model_info['size'] = model.calculate_model_file_size() model_info['role'] = request_config["model_id"].split( '#')[0] model_info['party_id'] = request_config[ "model_id"].split('#')[1] if model_utils.compare_version( model_info['f_fate_version'], '1.5.1') == 'lt': model_info['roles'] = model_info.get( 'f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_info[ 'work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True model_utils.save_model_info(model_info) else: stat_logger.info( f'job id: {job_parameters.get("model_version")}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) submit_result = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update(submit_result) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)
init_arch_db() # init runtime config import argparse parser = argparse.ArgumentParser() parser.add_argument('--standalone_node', default=False, help="if standalone node mode or not ", action='store_true') args = parser.parse_args() RuntimeConfig.init_env() RuntimeConfig.set_process_role(ProcessRole.DRIVER) PrivilegeAuth.init() ServiceUtils.register() ResourceManager.initialize() Detector(interval=5 * 1000).start() DAGScheduler(interval=2 * 1000).start() server = grpc.server( futures.ThreadPoolExecutor(max_workers=10), options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), (cygrpc.ChannelArgKey.max_receive_message_length, -1)]) proxy_pb2_grpc.add_DataTransferServiceServicer_to_server( UnaryService(), server) server.add_insecure_port("{}:{}".format(IP, GRPC_PORT)) server.start() # start http server try: run_simple(hostname=IP, port=HTTP_PORT, application=app, threaded=True) stat_logger.info("FATE Flow server start Successfully") except OSError as e: traceback.print_exc()
def submit_job(): work_mode = request.json.get('job_runtime_conf', {}).get('job_parameters', {}).get('work_mode', None) detect_utils.check_config({'work_mode': work_mode}, required_arguments=[('work_mode', (WorkMode.CLUSTER, WorkMode.STANDALONE))]) job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = DAGScheduler.submit( request.json) return get_json_result(retcode=0, retmsg='success', job_id=job_id, data={ 'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path, 'model_info': model_info, 'board_url': board_url, 'logs_directory': logs_directory })
def rerun_job(job_id, role, party_id): DAGScheduler.rerun_job(job_id=job_id, initiator_role=role, initiator_party_id=party_id, component_name=request.json.get("component_name")) return get_json_result(retcode=0, retmsg='success')
def stop_job(job_id, role, party_id, stop_status): retcode, retmsg = DAGScheduler.stop_job(job_id=job_id, role=role, party_id=party_id, stop_status=stop_status) return get_json_result(retcode=retcode, retmsg=retmsg)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_job_id == train_runtime_conf[ "job_parameters"]["model_version"], MLModel.f_role == request_config["role"]) if not model: MLModel.create( f_role=request_config["role"], f_party_id=request_config["party_id"], f_roles=train_runtime_conf["role"], f_job_id=train_runtime_conf["job_parameters"] ["model_version"], f_model_id=train_runtime_conf["job_parameters"] ["model_id"], f_model_version=train_runtime_conf[ "job_parameters"]["model_version"], f_initiator_role=train_runtime_conf[ "initiator"]["role"], f_initiator_party_id=train_runtime_conf[ "initiator"]["party_id"], f_runtime_conf=train_runtime_conf, f_work_mode=train_runtime_conf[ "job_parameters"]["work_mode"], f_dsl=json_loads(pipeline.train_dsl), f_imported=1, f_job_status='complete') else: stat_logger.info( f'job id: {train_runtime_conf["job_parameters"]["model_version"]}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update({ 'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path, 'board_url': board_url, 'logs_directory': logs_directory }) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)