def output_model_reload(cls, job, source_job): source_model_id = model_utils.gen_party_model_id( source_job.f_runtime_conf.get("job_parameters").get("common").get( "model_id"), job.f_role, job.f_party_id) model_id = model_utils.gen_party_model_id( job.f_runtime_conf.get("job_parameters").get("common").get( "model_id"), job.f_role, job.f_party_id) PipelinedModel( model_id=model_id, model_version=job.f_job_id).reload_component_model( model_id=source_model_id, model_version=job.f_inheritance_info.get("job_id"), component_list=job.f_inheritance_info.get("component_list"))
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = generate_job_id() if model_operation not in [ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT]: raise Exception('Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id(model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel(model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) return get_json_result() else: model = pipelined_model.PipelinedModel(model_id=request_config["model_id"], model_version=request_config["model_version"]) archive_file_path = model.packaging_model() return send_file(archive_file_path, attachment_filename=os.path.basename(archive_file_path), as_attachment=True) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config(request_config, model_operation) job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = JobController.submit_job( {'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf}, job_id=job_id) data.update({'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path, 'board_url': board_url, 'logs_directory': logs_directory}) return get_json_result(job_id=job_id, data=data)
def publish_online(config_data): initiator_role = config_data['initiator']['role'] initiator_party_id = config_data['initiator']['party_id'] model_id = config_data['job_parameters']['model_id'] model_version = config_data['job_parameters']['model_version'] success = True for serving in config_data.get('servings'): with grpc.insecure_channel(serving) as channel: stub = model_service_pb2_grpc.ModelServiceStub(channel) publish_model_request = model_service_pb2.PublishRequest() for role_name, role_party in config_data.get("role").items(): publish_model_request.role[role_name].partyId.extend(role_party) publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version publish_model_request.model[initiator_role].roleModelInfo[ initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role, initiator_party_id) publish_model_request.local.role = initiator_role publish_model_request.local.partyId = initiator_party_id stat_logger.info(publish_model_request) response = stub.publishOnline(publish_model_request) stat_logger.info(response) if response.statusCode != 0: success = False return success
def __init__(self, job_id: str, role: str, party_id: int, model_id: str = None, model_version: str = None, component_name: str = None, component_module_name: str = None, task_id: str = None): self.job_id = job_id self.role = role self.party_id = party_id self.component_name = component_name if component_name else 'pipeline' self.module_name = component_module_name if component_module_name else 'Pipeline' self.task_id = task_id if task_id else job_utils.generate_task_id( job_id=self.job_id, component_name=self.component_name) self.table_namespace = '_'.join([ 'fate_flow', 'tracking', 'data', self.job_id, self.role, str(self.party_id), self.component_name ]) self.job_table_namespace = '_'.join([ 'fate_flow', 'tracking', 'data', self.job_id, self.role, str(self.party_id) ]) self.model_id = model_id self.party_model_id = model_utils.gen_party_model_id(model_id=model_id, role=role, party_id=party_id) self.model_version = model_version self.pipelined_model = None if self.party_model_id and self.model_version: self.pipelined_model = pipelined_model.PipelinedModel( model_id=self.party_model_id, model_version=self.model_version)
def do_load_model(): request_data = request.json adapter_servings_config(request_data) retcode, retmsg = publish_model.load_model(config_data=request_data) try: if not retcode: with DB.connection_context(): model = MLModel.get_or_none(MLModel.f_role == request_data.get("local").get("role"), MLModel.f_party_id == request_data.get("local").get("party_id"), MLModel.f_model_id == request_data.get("job_parameters").get("model_id"), MLModel.f_model_version == request_data.get("job_parameters").get("model_version")) if model: count = model.f_loaded_times model.f_loaded_times = count + 1 model.save() except Exception as modify_err: stat_logger.exception(modify_err) try: party_model_id = gen_party_model_id(role=request_data.get("local").get("role"), party_id=request_data.get("local").get("party_id"), model_id=request_data.get("job_parameters").get("model_id")) src_model_path = os.path.join(file_utils.get_project_base_directory(), 'model_local_cache', party_model_id, request_data.get("job_parameters").get("model_version")) dst_model_path = os.path.join(file_utils.get_project_base_directory(), 'loaded_model_backup', party_model_id, request_data.get("job_parameters").get("model_version")) if not os.path.exists(dst_model_path): shutil.copytree(src=src_model_path, dst=dst_model_path) except Exception as copy_err: stat_logger.exception(copy_err) operation_record(request_data, "load", "success" if not retcode else "failed") return get_json_result(retcode=retcode, retmsg=retmsg)
def bind_model_service(config_data): service_id = config_data.get('service_id') initiator_role = config_data['initiator']['role'] initiator_party_id = config_data['initiator']['party_id'] model_id = config_data['job_parameters']['model_id'] model_version = config_data['job_parameters']['model_version'] if not config_data.get('servings'): return 100, 'Please configure servings address' for serving in config_data.get('servings'): with grpc.insecure_channel(serving) as channel: stub = model_service_pb2_grpc.ModelServiceStub(channel) publish_model_request = model_service_pb2.PublishRequest() publish_model_request.serviceId = service_id for role_name, role_party in config_data.get("role").items(): publish_model_request.role[role_name].partyId.extend(role_party) publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version publish_model_request.model[initiator_role].roleModelInfo[ initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role, initiator_party_id) publish_model_request.local.role = initiator_role publish_model_request.local.partyId = initiator_party_id stat_logger.info(publish_model_request) response = stub.publishBind(publish_model_request) stat_logger.info(response) if response.statusCode != 0: return response.statusCode, response.message return 0, None
def __init__(self, job_id: str, role: str, party_id: int, model_id: str = None, model_version: str = None, component_name: str = None, component_module_name: str = None, task_id: str = None, task_version: int = None, job_parameters: RunParameters = None): self.job_id = job_id self.role = role self.party_id = party_id self.model_id = model_id self.party_model_id = model_utils.gen_party_model_id(model_id=model_id, role=role, party_id=party_id) self.model_version = model_version self.pipelined_model = None if self.party_model_id and self.model_version: self.pipelined_model = pipelined_model.PipelinedModel( model_id=self.party_model_id, model_version=self.model_version) self.component_name = component_name if component_name else job_utils.job_virtual_component_name( ) self.module_name = component_module_name if component_module_name else job_utils.job_virtual_component_module_name( ) self.task_id = task_id self.task_version = task_version self.job_parameters = job_parameters
def do_load_model(): request_data = request.json request_data['servings'] = RuntimeConfig.SERVICE_DB.get_urls('servings') role = request_data['local']['role'] party_id = request_data['local']['party_id'] model_id = request_data['job_parameters']['model_id'] model_version = request_data['job_parameters']['model_version'] party_model_id = model_utils.gen_party_model_id(model_id, role, party_id) if get_base_config('enable_model_store', False): pipeline_model = pipelined_model.PipelinedModel( party_model_id, model_version) component_parameters = { 'model_id': party_model_id, 'model_version': model_version, 'store_address': ServiceRegistry.MODEL_STORE_ADDRESS, } model_storage = get_model_storage(component_parameters) if pipeline_model.exists() and not model_storage.exists( **component_parameters): stat_logger.info( f'Uploading {pipeline_model.model_path} to model storage.') model_storage.store(**component_parameters) elif not pipeline_model.exists() and model_storage.exists( **component_parameters): stat_logger.info( f'Downloading {pipeline_model.model_path} from model storage.') model_storage.restore(**component_parameters) if not model_utils.check_if_deployed(role, party_id, model_id, model_version): return get_json_result( retcode=100, retmsg= "Only deployed models could be used to execute process of loading. " "Please deploy model before loading.") retcode, retmsg = publish_model.load_model(request_data) try: if not retcode: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_role == request_data["local"]["role"], MLModel.f_party_id == request_data["local"]["party_id"], MLModel.f_model_id == request_data["job_parameters"] ["model_id"], MLModel.f_model_version == request_data["job_parameters"]["model_version"]) if model: model.f_loaded_times += 1 model.save() except Exception as modify_err: stat_logger.exception(modify_err) operation_record(request_data, "load", "success" if not retcode else "failed") return get_json_result(retcode=retcode, retmsg=retmsg)
def generate_publish_model_info(config_data): model_id = config_data['job_parameters']['model_id'] model_version = config_data['job_parameters']['model_version'] config_data['model'] = {} for role, role_party in config_data.get("role").items(): config_data['model'][role] = {} for party_id in role_party: config_data['model'][role][party_id] = { 'model_id': model_utils.gen_party_model_id(model_id, role, party_id), 'model_version': model_version}
def deploy_homo_model(request_data): party_model_id = model_utils.gen_party_model_id( model_id=request_data["model_id"], role=request_data["role"], party_id=request_data["party_id"]) model_version = request_data["model_version"] component_name = request_data['component_name'] service_id = request_data['service_id'] framework_name = request_data.get('framework_name') model = pipelined_model.PipelinedModel(model_id=party_model_id, model_version=model_version) if not model.exists(): return 100, 'Model {} {} does not exist'.format( party_model_id, model_version), None # get the model alias from the dsl saved with the pipeline pipeline = model.read_pipeline_model() train_dsl = json_loads(pipeline.train_dsl) if component_name not in train_dsl.get('components', {}): return 100, 'Model {} {} does not contain component {}'.\ format(party_model_id, model_version, component_name), None model_alias_list = train_dsl['components'][component_name].get( 'output', {}).get('model') if not model_alias_list: return 100, 'Component {} in Model {} {} does not have output model'. \ format(component_name, party_model_id, model_version), None # currently there is only one model output model_alias = model_alias_list[0] converted_model_dir = os.path.join(model.variables_data_path, component_name, model_alias, "converted_model") if not os.path.isdir(converted_model_dir): return 100, '''Component {} in Model {} {} isn't converted'''.\ format(component_name, party_model_id, model_version), None # todo: use subprocess? convert_tool = model.get_homo_model_convert_tool() if not framework_name: module_name = train_dsl['components'][component_name].get('module') buffer_obj = model.read_component_model(component_name, model_alias) framework_name = convert_tool.get_default_target_framework( model_contents=buffer_obj, module_name=module_name) model_object = convert_tool.load_converted_model( base_dir=converted_model_dir, framework_name=framework_name) deployed_service = model_deploy(party_model_id, model_version, model_object, framework_name, service_id, request_data['deployment_type'], request_data['deployment_parameters']) return ( 0, f"An online serving service is started in the {request_data['deployment_type']} system.", deployed_service)
def convert_homo_model(request_data): party_model_id = model_utils.gen_party_model_id( model_id=request_data["model_id"], role=request_data["role"], party_id=request_data["party_id"]) model_version = request_data.get("model_version") model = pipelined_model.PipelinedModel(model_id=party_model_id, model_version=model_version) if not model.exists(): return 100, 'Model {} {} does not exist'.format( party_model_id, model_version), None with open(model.define_meta_path, "r", encoding="utf-8") as fr: define_index = yaml.safe_load(fr) framework_name = request_data.get("framework_name") detail = [] # todo: use subprocess? convert_tool = model.get_homo_model_convert_tool() for key, value in define_index.get("model_proto", {}).items(): if key == 'pipeline': continue for model_alias in value.keys(): buffer_obj = model.read_component_model(key, model_alias) module_name = define_index.get("component_define", {}).get(key, {}).get('module_name') converted_framework, converted_model = convert_tool.model_convert( model_contents=buffer_obj, module_name=module_name, framework_name=framework_name) if converted_model: converted_model_dir = os.path.join(model.variables_data_path, key, model_alias, "converted_model") os.makedirs(converted_model_dir, exist_ok=True) saved_path = convert_tool.save_converted_model( converted_model, converted_framework, converted_model_dir) detail.append({ "component_name": key, "model_alias": model_alias, "converted_model_path": saved_path }) if len(detail) > 0: return ( 0, f"Conversion of homogeneous federated learning component(s) in model " f"{party_model_id}:{model_version} completed. Use export or h**o/deploy " f"to download or deploy the converted model.", detail) else: return 100, f"No component in model {party_model_id}:{model_version} can be converted.", None
def read_component_model(self): pipelined_model = PipelinedModel( gen_party_model_id(self.model_id, self.tracker.role, self.tracker.party_id), self.model_version) component_model = pipelined_model._read_component_model( self.component_name, self.model_alias) if not component_model: raise ValueError('The component model is empty.') self.model_output = component_model self.tracker.set_metric_meta( 'model_loader', f'{self.component_name}-{self.model_alias}', MetricMeta( 'component_model', 'component_model_info', { 'model_id': self.model_id, 'model_version': self.model_version, 'component_name': self.component_name, 'model_alias': self.model_alias, }))
def __init__( self, job_id: str = None, role: str = None, party_id: int = None, model_id: str = None, model_version: str = None, component_name: str = None, component_module_name: str = None, task_id: str = None, task_version: int = None, job_parameters: RunParameters = None, max_to_keep: int = None, mkdir: bool = True, ): self.job_id = job_id self.role = role self.party_id = party_id self.model_id = model_id self.model_version = model_version self.party_model_id = gen_party_model_id(self.model_id, self.role, self.party_id) self.component_name = component_name if component_name else 'pipeline' self.module_name = component_module_name if component_module_name else 'Pipeline' self.task_id = task_id self.task_version = task_version self.job_parameters = job_parameters self.mkdir = mkdir self.directory = (Path(get_fate_flow_directory()) / 'model_local_cache' / self.party_model_id / model_version / 'checkpoint' / self.component_name) if self.mkdir: self.directory.mkdir(0o755, True, True) if isinstance(max_to_keep, int): if max_to_keep <= 0: raise ValueError('max_to_keep must be positive') elif max_to_keep is not None: raise TypeError('max_to_keep must be an integer') self.checkpoints = deque(maxlen=max_to_keep)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: adapter = JobRuntimeConfigAdapter(train_runtime_conf) job_parameters = adapter.get_common_parameters().to_dict() with DB.connection_context(): db_model = MLModel.get_or_none( MLModel.f_job_id == job_parameters.get( "model_version"), MLModel.f_role == request_config["role"]) if not db_model: model_info = model_utils.gather_model_info_data(model) model_info['imported'] = 1 model_info['job_id'] = model_info['f_model_version'] model_info['size'] = model.calculate_model_file_size() model_info['role'] = request_config["model_id"].split( '#')[0] model_info['party_id'] = request_config[ "model_id"].split('#')[1] if model_utils.compare_version( model_info['f_fate_version'], '1.5.1') == 'lt': model_info['roles'] = model_info.get( 'f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_info[ 'work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True model_utils.save_model_info(model_info) else: stat_logger.info( f'job id: {job_parameters.get("model_version")}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) submit_result = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update(submit_result) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') components_checkpoint = config_data.get('components_checkpoint', {}) warning_msg = "" try: party_model_id = gen_party_model_id(model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path, ignore=lambda src, names: {'checkpoint'} if src == model.model_path else {}) pipeline_model = deploy_model.read_pipeline_model() train_runtime_conf = json_loads(pipeline_model.train_runtime_conf) runtime_conf_on_party = json_loads( pipeline_model.runtime_conf_on_party) dsl_version = train_runtime_conf.get("dsl_version", "1") parser = get_dsl_parser_by_version(dsl_version) train_dsl = json_loads(pipeline_model.train_dsl) parent_predict_dsl = json_loads(pipeline_model.inference_dsl) if config_data.get('dsl') or config_data.get('predict_dsl'): inference_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(inference_dsl, dict): inference_dsl = json_loads(inference_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) if int(dsl_version) == 1: # convert v1 dsl to v2 dsl inference_dsl, warning_msg = parser.convert_dsl_v1_to_v2( parent_predict_dsl) else: parser = get_dsl_parser_by_version(dsl_version) inference_dsl = parser.deploy_component(cpn_list, train_dsl) # convert v1 conf to v2 conf if int(dsl_version) == 1: components = parser.get_components_light_weight(inference_dsl) from fate_flow.db.component_registry import ComponentRegistry job_providers = parser.get_job_providers( dsl=inference_dsl, provider_detail=ComponentRegistry.REGISTRY) cpn_role_parameters = dict() for cpn in components: cpn_name = cpn.get_name() role_params = parser.parse_component_role_parameters( component=cpn_name, dsl=inference_dsl, runtime_conf=train_runtime_conf, provider_detail=ComponentRegistry.REGISTRY, provider_name=job_providers[cpn_name]["provider"]["name"], provider_version=job_providers[cpn_name]["provider"] ["version"]) cpn_role_parameters[cpn_name] = role_params train_runtime_conf = parser.convert_conf_v1_to_v2( train_runtime_conf, cpn_role_parameters) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline_model.model_version = child_model_version pipeline_model.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) # save inference dsl into child model file parser = get_dsl_parser_by_version(2) parser.verify_dsl(inference_dsl, "predict") inference_dsl = JobSaver.fill_job_inference_dsl( job_id=model_version, role=local_role, party_id=local_party_id, dsl_parser=parser, origin_inference_dsl=inference_dsl) pipeline_model.inference_dsl = json_dumps(inference_dsl, byte=True) if compare_version(pipeline_model.fate_version, '1.5.0') == 'gt': pipeline_model.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline_model.parent = False runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline_model.runtime_conf_on_party = json_dumps( runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline_model) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') save_model_info(model_info) for component_name, component in train_dsl.get('components', {}).items(): step_index = components_checkpoint.get(component_name, {}).get('step_index') step_name = components_checkpoint.get(component_name, {}).get('step_name') if step_index is not None: step_index = int(step_index) step_name = None elif step_name is None: continue checkpoint_manager = CheckpointManager( role=local_role, party_id=local_party_id, model_id=model_id, model_version=model_version, component_name=component_name, mkdir=False, ) checkpoint_manager.load_checkpoints_from_disk() if checkpoint_manager.latest_checkpoint is not None: checkpoint_manager.deploy( child_model_version, component['output']['model'][0] if component.get( 'output', {}).get('model') else 'default', step_index, step_name, ) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: msg = f"deploy model of role {local_role} {local_party_id} success" if warning_msg: msg = msg + f", warning: {warning_msg}" return 0, msg
def migration(config_data: dict): try: party_model_id = model_utils.gen_party_model_id( model_id=config_data["model_id"], role=config_data["local"]["role"], party_id=config_data["local"]["party_id"]) model = pipelined_model.PipelinedModel( model_id=party_model_id, model_version=config_data["model_version"]) if not model.exists(): raise Exception("Can not found {} {} model local cache".format( config_data["model_id"], config_data["model_version"])) with DB.connection_context(): if MLModel.get_or_none(MLModel.f_model_version == config_data["unify_model_version"]): raise Exception( "Unify model version {} has been occupied in database. " "Please choose another unify model version and try again.". format(config_data["unify_model_version"])) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") migrate_model = pipelined_model.PipelinedModel( model_id=model_utils.gen_party_model_id( model_id=model_utils.gen_model_id(config_data["migrate_role"]), role=config_data["local"]["role"], party_id=config_data["local"]["migrate_party_id"]), model_version=config_data["unify_model_version"]) # migrate_model.create_pipelined_model() shutil.copytree(src=model.model_path, dst=migrate_model.model_path) pipeline = migrate_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # Utilize Pipeline_model collect model data. And modify related inner information of model train_runtime_conf = json_loads(pipeline.train_runtime_conf) train_runtime_conf["role"] = config_data["migrate_role"] train_runtime_conf["initiator"] = config_data["migrate_initiator"] adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_id=model_utils.gen_model_id(train_runtime_conf["role"]), model_version=migrate_model.model_version) # update pipeline.pb file pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) pipeline.model_id = bytes( adapter.get_common_parameters().to_dict.get("model_id"), "utf-8") pipeline.model_version = bytes( adapter.get_common_parameters().to_dict().get("model_version"), "utf-8") # save updated pipeline.pb file migrate_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(migrate_model.model_path, "pipeline.pb"), os.path.join(migrate_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) # modify proto with open( os.path.join(migrate_model.model_path, 'define', 'define_meta.yaml'), 'r') as fin: define_yaml = yaml.safe_load(fin) for key, value in define_yaml['model_proto'].items(): if key == 'pipeline': continue for v in value.keys(): buffer_obj = migrate_model.read_component_model(key, v) module_name = define_yaml['component_define'].get( key, {}).get('module_name') modified_buffer = model_migration( model_contents=buffer_obj, module_name=module_name, old_guest_list=config_data['role']['guest'], new_guest_list=config_data['migrate_role']['guest'], old_host_list=config_data['role']['host'], new_host_list=config_data['migrate_role']['host'], old_arbiter_list=config_data.get('role', {}).get('arbiter', None), new_arbiter_list=config_data.get('migrate_role', {}).get('arbiter', None)) migrate_model.save_component_model( component_name=key, component_module_name=module_name, model_alias=v, model_buffers=modified_buffer) archive_path = migrate_model.packaging_model() shutil.rmtree(os.path.abspath(migrate_model.model_path)) return (0, f"Migrating model successfully. " \ "The configuration of model has been modified automatically. " \ "New model id is: {}, model version is: {}. " \ "Model files can be found at '{}'.".format(adapter.get_common_parameters()["model_id"], migrate_model.model_version, os.path.abspath(archive_path)), {"model_id": migrate_model.model_id, "model_version": migrate_model.model_version, "path": os.path.abspath(archive_path)}) except Exception as e: return 100, str(e), {}
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') try: party_model_id = model_utils.gen_party_model_id( model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path) pipeline = deploy_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # modify two pipeline files (model version/ train_runtime_conf) train_runtime_conf = json_loads(pipeline.train_runtime_conf) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline.model_version = child_model_version pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) parser = get_dsl_parser_by_version( train_runtime_conf.get('dsl_version', '1')) train_dsl = json_loads(pipeline.train_dsl) parent_predict_dsl = json_loads(pipeline.inference_dsl) if str(train_runtime_conf.get('dsl_version', '1')) == '1': predict_dsl = json_loads(pipeline.inference_dsl) else: if config_data.get('dsl') or config_data.get('predict_dsl'): predict_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(predict_dsl, dict): predict_dsl = json_loads(predict_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) parser_version = train_runtime_conf.get('dsl_version', '1') if str(parser_version) == '1': predict_dsl = parent_predict_dsl else: parser = schedule_utils.get_dsl_parser_by_version( parser_version) predict_dsl = parser.deploy_component(cpn_list, train_dsl) # save predict dsl into child model file parser.verify_dsl(predict_dsl, "predict") inference_dsl = parser.get_predict_dsl( role=local_role, predict_dsl=predict_dsl, setting_conf_prefix=file_utils. get_federatedml_setting_conf_directory()) pipeline.inference_dsl = json_dumps(inference_dsl, byte=True) if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt': pipeline.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline.parent = False runtime_conf_on_party = json_loads(pipeline.runtime_conf_on_party) runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = model_utils.gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if model_utils.compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_utils.save_model_info(model_info) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: return 0, f"deploy model of role {local_role} {local_party_id} success"
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_job_id == train_runtime_conf[ "job_parameters"]["model_version"], MLModel.f_role == request_config["role"]) if not model: MLModel.create( f_role=request_config["role"], f_party_id=request_config["party_id"], f_roles=train_runtime_conf["role"], f_job_id=train_runtime_conf["job_parameters"] ["model_version"], f_model_id=train_runtime_conf["job_parameters"] ["model_id"], f_model_version=train_runtime_conf[ "job_parameters"]["model_version"], f_initiator_role=train_runtime_conf[ "initiator"]["role"], f_initiator_party_id=train_runtime_conf[ "initiator"]["party_id"], f_runtime_conf=train_runtime_conf, f_work_mode=train_runtime_conf[ "job_parameters"]["work_mode"], f_dsl=json_loads(pipeline.train_dsl), f_imported=1, f_job_status='complete') else: stat_logger.info( f'job id: {train_runtime_conf["job_parameters"]["model_version"]}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update({ 'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path, 'board_url': board_url, 'logs_directory': logs_directory }) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)