def save_machine_learning_model_info(self): try: record = MLModel.get_or_none(MLModel.f_model_version == self.job_id, MLModel.f_role == self.role, MLModel.f_model_id == self.model_id, MLModel.f_party_id == self.party_id) if not record: job = Job.get_or_none(Job.f_job_id == self.job_id) pipeline = self.pipelined_model.read_pipeline_model() if job: job_data = job.to_dict() model_info = { 'job_id': job_data.get("f_job_id"), 'role': self.role, 'party_id': self.party_id, 'roles': job_data.get("f_roles"), 'model_id': self.model_id, 'model_version': self.model_version, 'initiator_role': job_data.get('f_initiator_role'), 'initiator_party_id': job_data.get('f_initiator_party_id'), 'runtime_conf': job_data.get('f_runtime_conf'), 'work_mode': job_data.get('f_work_mode'), 'train_dsl': job_data.get('f_dsl'), 'train_runtime_conf': job_data.get('f_train_runtime_conf'), 'size': self.get_model_size(), 'job_status': job_data.get('f_status'), 'parent': pipeline.parent, 'fate_version': pipeline.fate_version, 'runtime_conf_on_party': json_loads(pipeline.runtime_conf_on_party), 'parent_info': json_loads(pipeline.parent_info), 'inference_dsl': json_loads(pipeline.inference_dsl) } model_utils.save_model_info(model_info) schedule_logger(self.job_id).info( 'save {} model info done. model id: {}, model version: {}.'.format(self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info( 'save {} model info failed, no job found in db. ' 'model id: {}, model version: {}.'.format(self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info('model {} info has already existed in database.'.format(self.job_id)) except Exception as e: schedule_logger(self.job_id).exception(e)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: adapter = JobRuntimeConfigAdapter(train_runtime_conf) job_parameters = adapter.get_common_parameters().to_dict() with DB.connection_context(): db_model = MLModel.get_or_none( MLModel.f_job_id == job_parameters.get( "model_version"), MLModel.f_role == request_config["role"]) if not db_model: model_info = model_utils.gather_model_info_data(model) model_info['imported'] = 1 model_info['job_id'] = model_info['f_model_version'] model_info['size'] = model.calculate_model_file_size() model_info['role'] = request_config["model_id"].split( '#')[0] model_info['party_id'] = request_config[ "model_id"].split('#')[1] if model_utils.compare_version( model_info['f_fate_version'], '1.5.1') == 'lt': model_info['roles'] = model_info.get( 'f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_info[ 'work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True model_utils.save_model_info(model_info) else: stat_logger.info( f'job id: {job_parameters.get("model_version")}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) submit_result = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update(submit_result) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') try: party_model_id = model_utils.gen_party_model_id( model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path) pipeline = deploy_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # modify two pipeline files (model version/ train_runtime_conf) train_runtime_conf = json_loads(pipeline.train_runtime_conf) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline.model_version = child_model_version pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) parser = get_dsl_parser_by_version( train_runtime_conf.get('dsl_version', '1')) train_dsl = json_loads(pipeline.train_dsl) parent_predict_dsl = json_loads(pipeline.inference_dsl) if str(train_runtime_conf.get('dsl_version', '1')) == '1': predict_dsl = json_loads(pipeline.inference_dsl) else: if config_data.get('dsl') or config_data.get('predict_dsl'): predict_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(predict_dsl, dict): predict_dsl = json_loads(predict_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) parser_version = train_runtime_conf.get('dsl_version', '1') if str(parser_version) == '1': predict_dsl = parent_predict_dsl else: parser = schedule_utils.get_dsl_parser_by_version( parser_version) predict_dsl = parser.deploy_component(cpn_list, train_dsl) # save predict dsl into child model file parser.verify_dsl(predict_dsl, "predict") inference_dsl = parser.get_predict_dsl( role=local_role, predict_dsl=predict_dsl, setting_conf_prefix=file_utils. get_federatedml_setting_conf_directory()) pipeline.inference_dsl = json_dumps(inference_dsl, byte=True) if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt': pipeline.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline.parent = False runtime_conf_on_party = json_loads(pipeline.runtime_conf_on_party) runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = model_utils.gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if model_utils.compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_utils.save_model_info(model_info) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: return 0, f"deploy model of role {local_role} {local_party_id} success"
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') components_checkpoint = config_data.get('components_checkpoint', {}) warning_msg = "" try: party_model_id = gen_party_model_id(model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path, ignore=lambda src, names: {'checkpoint'} if src == model.model_path else {}) pipeline_model = deploy_model.read_pipeline_model() train_runtime_conf = json_loads(pipeline_model.train_runtime_conf) runtime_conf_on_party = json_loads( pipeline_model.runtime_conf_on_party) dsl_version = train_runtime_conf.get("dsl_version", "1") parser = get_dsl_parser_by_version(dsl_version) train_dsl = json_loads(pipeline_model.train_dsl) parent_predict_dsl = json_loads(pipeline_model.inference_dsl) if config_data.get('dsl') or config_data.get('predict_dsl'): inference_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(inference_dsl, dict): inference_dsl = json_loads(inference_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) if int(dsl_version) == 1: # convert v1 dsl to v2 dsl inference_dsl, warning_msg = parser.convert_dsl_v1_to_v2( parent_predict_dsl) else: parser = get_dsl_parser_by_version(dsl_version) inference_dsl = parser.deploy_component(cpn_list, train_dsl) # convert v1 conf to v2 conf if int(dsl_version) == 1: components = parser.get_components_light_weight(inference_dsl) from fate_flow.db.component_registry import ComponentRegistry job_providers = parser.get_job_providers( dsl=inference_dsl, provider_detail=ComponentRegistry.REGISTRY) cpn_role_parameters = dict() for cpn in components: cpn_name = cpn.get_name() role_params = parser.parse_component_role_parameters( component=cpn_name, dsl=inference_dsl, runtime_conf=train_runtime_conf, provider_detail=ComponentRegistry.REGISTRY, provider_name=job_providers[cpn_name]["provider"]["name"], provider_version=job_providers[cpn_name]["provider"] ["version"]) cpn_role_parameters[cpn_name] = role_params train_runtime_conf = parser.convert_conf_v1_to_v2( train_runtime_conf, cpn_role_parameters) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline_model.model_version = child_model_version pipeline_model.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) # save inference dsl into child model file parser = get_dsl_parser_by_version(2) parser.verify_dsl(inference_dsl, "predict") inference_dsl = JobSaver.fill_job_inference_dsl( job_id=model_version, role=local_role, party_id=local_party_id, dsl_parser=parser, origin_inference_dsl=inference_dsl) pipeline_model.inference_dsl = json_dumps(inference_dsl, byte=True) if compare_version(pipeline_model.fate_version, '1.5.0') == 'gt': pipeline_model.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline_model.parent = False runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline_model.runtime_conf_on_party = json_dumps( runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline_model) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') save_model_info(model_info) for component_name, component in train_dsl.get('components', {}).items(): step_index = components_checkpoint.get(component_name, {}).get('step_index') step_name = components_checkpoint.get(component_name, {}).get('step_name') if step_index is not None: step_index = int(step_index) step_name = None elif step_name is None: continue checkpoint_manager = CheckpointManager( role=local_role, party_id=local_party_id, model_id=model_id, model_version=model_version, component_name=component_name, mkdir=False, ) checkpoint_manager.load_checkpoints_from_disk() if checkpoint_manager.latest_checkpoint is not None: checkpoint_manager.deploy( child_model_version, component['output']['model'][0] if component.get( 'output', {}).get('model') else 'default', step_index, step_name, ) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: msg = f"deploy model of role {local_role} {local_party_id} success" if warning_msg: msg = msg + f", warning: {warning_msg}" return 0, msg