def save_model_info(model_info): model = MLModel() model.f_create_time = current_timestamp() for k, v in model_info.items(): attr_name = 'f_%s' % k if hasattr(MLModel, attr_name): setattr(model, attr_name, v) elif hasattr(MLModel, k): setattr(model, k, v) try: rows = model.save(force_insert=True) if rows != 1: raise Exception("Save to database failed") except peewee.IntegrityError as e: if e.args[0] != 1062: raise Exception("Create {} failed:\n{}".format(MLModel, e)) sql_logger(job_id=model_info.get("job_id", "fate_flow")).warning(e) return except Exception as e: raise Exception("Create {} failed:\n{}".format(MLModel, e)) RuntimeConfig.SERVICE_DB.register_model( gen_party_model_id(role=model.f_role, party_id=model.f_party_id, model_id=model.f_model_id), model.f_model_version) return model
def save_model_info(model_info): model = MLModel() model.f_create_time = current_timestamp() for k, v in model_info.items(): attr_name = 'f_%s' % k if hasattr(MLModel, attr_name): setattr(model, attr_name, v) elif hasattr(MLModel, k): setattr(model, k, v) try: rows = model.save(force_insert=True) if rows != 1: raise Exception("Create {} failed".format(MLModel)) if RuntimeConfig.zk_client is not None: ServiceUtils.register( RuntimeConfig.zk_client, gen_party_model_id(role=model.f_role, party_id=model.f_party_id, model_id=model.f_model_id), model.f_model_version) return model except peewee.IntegrityError as e: if e.args[0] == 1062: sql_logger(job_id=model_info.get("job_id", "fate_flow")).warning(e) else: raise Exception("Create {} failed:\n{}".format(MLModel, e)) except Exception as e: raise Exception("Create {} failed:\n{}".format(MLModel, e))
def test_register_models(self): try: os.remove(DB.database) except FileNotFoundError: pass MLModel.create_table() for x in range(1, 101): job_id = str(time.time()) model = MLModel(f_role='host', f_party_id='100', f_job_id=job_id, f_model_id=f'foobar#{x}', f_model_version=job_id, f_initiator_role='host', f_work_mode=0) model.save(force_insert=True) self.assertEqual( db_services.models_group_by_party_model_id_and_model_version(). count(), 100) with patch.object(self.service_db, '_insert') as insert: self.service_db.register_models() self.assertEqual(insert.call_count, 100) with patch.object(self.service_db, '_delete') as delete: self.service_db.unregister_models() self.assertEqual(delete.call_count, 100) os.remove(DB.database)
def do_load_model(): request_data = request.json adapter_servings_config(request_data) retcode, retmsg = publish_model.load_model(config_data=request_data) try: if not retcode: with DB.connection_context(): model = MLModel.get_or_none(MLModel.f_role == request_data.get("local").get("role"), MLModel.f_party_id == request_data.get("local").get("party_id"), MLModel.f_model_id == request_data.get("job_parameters").get("model_id"), MLModel.f_model_version == request_data.get("job_parameters").get("model_version")) if model: count = model.f_loaded_times model.f_loaded_times = count + 1 model.save() except Exception as modify_err: stat_logger.exception(modify_err) try: party_model_id = gen_party_model_id(role=request_data.get("local").get("role"), party_id=request_data.get("local").get("party_id"), model_id=request_data.get("job_parameters").get("model_id")) src_model_path = os.path.join(file_utils.get_project_base_directory(), 'model_local_cache', party_model_id, request_data.get("job_parameters").get("model_version")) dst_model_path = os.path.join(file_utils.get_project_base_directory(), 'loaded_model_backup', party_model_id, request_data.get("job_parameters").get("model_version")) if not os.path.exists(dst_model_path): shutil.copytree(src=src_model_path, dst=dst_model_path) except Exception as copy_err: stat_logger.exception(copy_err) operation_record(request_data, "load", "success" if not retcode else "failed") return get_json_result(retcode=retcode, retmsg=retmsg)
def bind_model_service(): request_config = request.json if request_config.get('job_id', None): with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_job_id == request_config.get("job_id"), MLModel.f_role == 'guest' ) if model: model_info = model.to_json() request_config['initiator'] = {} request_config['initiator']['party_id'] = str(model_info.get('f_initiator_party_id')) request_config['initiator']['role'] = model_info.get('f_initiator_role') request_config['job_parameters'] = model_info.get('f_runtime_conf').get('job_parameters') request_config['role'] = model_info.get('f_runtime_conf').get('role') for key, value in request_config['role'].items(): for i, v in enumerate(value): value[i] = str(v) request_config.pop('job_id') else: return get_json_result(retcode=101, retmsg="model {} can not be found in database. " "Please check if the model version is valid.".format(request_config.get('job_id'))) if not request_config.get('servings'): # get my party all servings adapter_servings_config(request_config) service_id = request_config.get('service_id') if not service_id: return get_json_result(retcode=101, retmsg='no service id') check_config(request_config, ['initiator', 'role', 'job_parameters']) bind_status, retmsg = publish_model.bind_model_service(config_data=request_config) operation_record(request_config, "bind", "success" if not bind_status else "failed") return get_json_result(retcode=bind_status, retmsg='service id is {}'.format(service_id) if not retmsg else retmsg)
def tag_model(operation): if operation not in ['retrieve', 'create', 'remove']: return get_json_result( 100, "'{}' is not currently supported.".format(operation)) request_data = request.json model = MLModel.get_or_none( MLModel.f_model_version == request_data.get("job_id")) if not model: raise Exception("Can not found model by job id: '{}'.".format( request_data.get("job_id"))) if operation == 'retrieve': res = {'tags': []} tags = (Tag.select().join( ModelTag, on=ModelTag.f_t_id == Tag.f_id).where( ModelTag.f_m_id == model.f_model_version)) for tag in tags: res['tags'].append({'name': tag.f_name, 'description': tag.f_desc}) res['count'] = tags.count() return get_json_result(data=res) elif operation == 'remove': tag = Tag.get_or_none(Tag.f_name == request_data.get('tag_name')) if not tag: raise Exception("Can not found '{}' tag.".format( request_data.get('tag_name'))) tags = (Tag.select().join( ModelTag, on=ModelTag.f_t_id == Tag.f_id).where( ModelTag.f_m_id == model.f_model_version)) if tag.f_name not in [t.f_name for t in tags]: raise Exception("Model {} {} does not have tag '{}'.".format( model.f_model_id, model.f_model_version, tag.f_name)) delete_query = ModelTag.delete().where( ModelTag.f_m_id == model.f_model_version, ModelTag.f_t_id == tag.f_id) delete_query.execute() return get_json_result( retmsg="'{}' tag has been removed from tag list of model {} {}.". format(request_data.get('tag_name'), model.f_model_id, model.f_model_version)) else: if not str(request_data.get('tag_name')): raise Exception("Tag name should not be an empty string.") tag = Tag.get_or_none(Tag.f_name == request_data.get('tag_name')) if not tag: tag = Tag() tag.f_name = request_data.get('tag_name') tag.save(force_insert=True) else: tags = (Tag.select().join( ModelTag, on=ModelTag.f_t_id == Tag.f_id).where( ModelTag.f_m_id == model.f_model_version)) if tag.f_name in [t.f_name for t in tags]: raise Exception( "Model {} {} already been tagged as tag '{}'.".format( model.f_model_id, model.f_model_version, tag.f_name)) ModelTag.create(f_t_id=tag.f_id, f_m_id=model.f_model_version) return get_json_result( retmsg="Adding {} tag for model with job id: {} successfully.". format(request_data.get('tag_name'), request_data.get('job_id')))
def do_load_model(): request_data = request.json request_data['servings'] = RuntimeConfig.SERVICE_DB.get_urls('servings') role = request_data['local']['role'] party_id = request_data['local']['party_id'] model_id = request_data['job_parameters']['model_id'] model_version = request_data['job_parameters']['model_version'] party_model_id = model_utils.gen_party_model_id(model_id, role, party_id) if get_base_config('enable_model_store', False): pipeline_model = pipelined_model.PipelinedModel( party_model_id, model_version) component_parameters = { 'model_id': party_model_id, 'model_version': model_version, 'store_address': ServiceRegistry.MODEL_STORE_ADDRESS, } model_storage = get_model_storage(component_parameters) if pipeline_model.exists() and not model_storage.exists( **component_parameters): stat_logger.info( f'Uploading {pipeline_model.model_path} to model storage.') model_storage.store(**component_parameters) elif not pipeline_model.exists() and model_storage.exists( **component_parameters): stat_logger.info( f'Downloading {pipeline_model.model_path} from model storage.') model_storage.restore(**component_parameters) if not model_utils.check_if_deployed(role, party_id, model_id, model_version): return get_json_result( retcode=100, retmsg= "Only deployed models could be used to execute process of loading. " "Please deploy model before loading.") retcode, retmsg = publish_model.load_model(request_data) try: if not retcode: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_role == request_data["local"]["role"], MLModel.f_party_id == request_data["local"]["party_id"], MLModel.f_model_id == request_data["job_parameters"] ["model_id"], MLModel.f_model_version == request_data["job_parameters"]["model_version"]) if model: model.f_loaded_times += 1 model.save() except Exception as modify_err: stat_logger.exception(modify_err) operation_record(request_data, "load", "success" if not retcode else "failed") return get_json_result(retcode=retcode, retmsg=retmsg)
def get_model_configuration(job_id, role, party_id): models = MLModel.select(MLModel.f_dsl, MLModel.f_runtime_conf, MLModel.f_train_runtime_conf).where(MLModel.f_job_id == job_id, MLModel.f_role == role, MLModel.f_party_id == party_id) if models: model = models[0] return model.f_dsl, model.f_runtime_conf, model.f_train_runtime_conf else: return {}, {}, {}
def save_machine_learning_model_info(self): try: record = MLModel.get_or_none( MLModel.f_model_version == self.job_id) if not record: job = Job.get_or_none(Job.f_job_id == self.job_id) if job: job_data = job.to_json() MLModel.create( f_role=self.role, f_party_id=self.party_id, f_roles=job_data.get("f_roles"), f_model_id=self.model_id, f_model_version=self.model_version, f_job_id=job_data.get("f_job_id"), f_create_time=current_timestamp(), f_initiator_role=job_data.get('f_initiator_role'), f_initiator_party_id=job_data.get( 'f_initiator_party_id'), f_runtime_conf=job_data.get('f_runtime_conf'), f_work_mode=job_data.get('f_work_mode'), f_dsl=job_data.get('f_dsl'), f_train_runtime_conf=job_data.get( 'f_train_runtime_conf'), f_size=self.get_model_size(), f_job_status=job_data.get('f_status')) schedule_logger(self.job_id).info( 'save {} model info done. model id: {}, model version: {}.' .format(self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info( 'save {} model info failed, no job found in db. ' 'model id: {}, model version: {}.'.format( self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info( 'model {} info has already existed in database.'.format( self.job_id)) except Exception as e: schedule_logger(self.job_id).exception(e)
def save_model_info(model_info): model = MLModel() model.f_create_time = current_timestamp() for k, v in model_info.items(): attr_name = 'f_%s' % k if hasattr(MLModel, attr_name): setattr(model, attr_name, v) elif hasattr(MLModel, k): setattr(model, k, v) try: rows = model.save(force_insert=True) if rows != 1: raise Exception("Create {} failed".format(MLModel)) return model except peewee.IntegrityError as e: if e.args[0] == 1062: sql_logger(job_id=model_info.get("job_id", "fate_flow")).warning(e) else: raise Exception("Create {} failed:\n{}".format(MLModel, e)) except Exception as e: raise Exception("Create {} failed:\n{}".format(MLModel, e))
def models_group_by_party_model_id_and_model_version(): args = [ MLModel.f_role, MLModel.f_party_id, MLModel.f_model_id, MLModel.f_model_version, ] models = MLModel.select(*args).group_by(*args) for model in models: model.f_party_model_id = gen_party_model_id(role=model.f_role, party_id=model.f_party_id, model_id=model.f_model_id) return models
def query_model_info_from_db(model_version, role=None, party_id=None, model_id=None, query_filters=None, **kwargs): conditions = [] filters = [] aruments = locals() cond_attrs = [ attr for attr in ['model_version', 'model_id', 'role', 'party_id'] if aruments[attr] ] for f_n in cond_attrs: conditions.append( operator.attrgetter('f_%s' % f_n)(MLModel) == aruments[f_n]) for f_n in kwargs: if hasattr(MLModel, 'f_%s' % f_n): conditions.append(operator.attrgetter('f_%s' % f_n)(MLModel)) if query_filters and isinstance(query_filters, list): for attr in query_filters: attr_name = 'f_%s' % attr if hasattr(MLModel, attr_name): filters.append(operator.attrgetter(attr_name)(MLModel)) if filters: models = MLModel.select(*filters).where(*conditions) else: models = MLModel.select().where(*conditions) if models: return 0, 'Query model info from db success.', [ model.to_json() for model in models ] else: return 100, 'Query model info failed, cannot find model from db. ', []
def save_machine_learning_model_info(self): try: record = MLModel.get_or_none(MLModel.f_model_version == self.job_id, MLModel.f_role == self.role, MLModel.f_model_id == self.model_id, MLModel.f_party_id == self.party_id) if not record: job = Job.get_or_none(Job.f_job_id == self.job_id) pipeline = self.pipelined_model.read_pipeline_model() if job: job_data = job.to_dict() model_info = { 'job_id': job_data.get("f_job_id"), 'role': self.role, 'party_id': self.party_id, 'roles': job_data.get("f_roles"), 'model_id': self.model_id, 'model_version': self.model_version, 'initiator_role': job_data.get('f_initiator_role'), 'initiator_party_id': job_data.get('f_initiator_party_id'), 'runtime_conf': job_data.get('f_runtime_conf'), 'work_mode': job_data.get('f_work_mode'), 'train_dsl': job_data.get('f_dsl'), 'train_runtime_conf': job_data.get('f_train_runtime_conf'), 'size': self.get_model_size(), 'job_status': job_data.get('f_status'), 'parent': pipeline.parent, 'fate_version': pipeline.fate_version, 'runtime_conf_on_party': json_loads(pipeline.runtime_conf_on_party), 'parent_info': json_loads(pipeline.parent_info), 'inference_dsl': json_loads(pipeline.inference_dsl) } model_utils.save_model_info(model_info) schedule_logger(self.job_id).info( 'save {} model info done. model id: {}, model version: {}.'.format(self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info( 'save {} model info failed, no job found in db. ' 'model id: {}, model version: {}.'.format(self.job_id, self.model_id, self.model_version)) else: schedule_logger(self.job_id).info('model {} info has already existed in database.'.format(self.job_id)) except Exception as e: schedule_logger(self.job_id).exception(e)
def operate_tag(tag_operation): request_data = request.json if tag_operation not in [ TagOperation.CREATE, TagOperation.RETRIEVE, TagOperation.UPDATE, TagOperation.DESTROY, TagOperation.LIST ]: raise Exception('The {} operation is not currently supported.'.format( tag_operation)) tag_name = request_data.get('tag_name') tag_desc = request_data.get('tag_desc') if tag_operation == TagOperation.CREATE: try: if not tag_name: return get_json_result( 100, "'{}' tag created failed. Please input a valid tag name.". format(tag_name)) else: Tag.create(f_name=tag_name, f_desc=tag_desc) except peewee.IntegrityError: raise Exception( "'{}' has already exists in database.".format(tag_name)) else: return get_json_result( "'{}' tag has been created successfully.".format(tag_name)) elif tag_operation == TagOperation.LIST: tags = Tag.select() limit = request_data.get('limit') res = {"tags": []} if limit > len(tags): count = len(tags) else: count = limit for tag in tags[:count]: res['tags'].append({ 'name': tag.f_name, 'description': tag.f_desc, 'model_count': ModelTag.filter(ModelTag.f_t_id == tag.f_id).count() }) return get_json_result(data=res) else: if not (tag_operation == TagOperation.RETRIEVE and not request_data.get('with_model')): try: tag = Tag.get(Tag.f_name == tag_name) except peewee.DoesNotExist: raise Exception("Can not found '{}' tag.".format(tag_name)) if tag_operation == TagOperation.RETRIEVE: if request_data.get('with_model', False): res = {'models': []} models = (MLModel.select().join( ModelTag, on=ModelTag.f_m_id == MLModel.f_model_version).where( ModelTag.f_t_id == tag.f_id)) for model in models: res["models"].append({ "model_id": model.f_model_id, "model_version": model.f_model_version, "model_size": model.f_size, "role": model.f_role, "party_id": model.f_party_id }) res["count"] = models.count() return get_json_result(data=res) else: tags = Tag.filter(Tag.f_name.contains(tag_name)) if not tags: return get_json_result(100, retmsg="No tags found.") res = {'tags': []} for tag in tags: res['tags'].append({ 'name': tag.f_name, 'description': tag.f_desc }) return get_json_result(data=res) elif tag_operation == TagOperation.UPDATE: new_tag_name = request_data.get('new_tag_name', None) new_tag_desc = request_data.get('new_tag_desc', None) if (tag.f_name == new_tag_name) and (tag.f_desc == new_tag_desc): return get_json_result(100, "Nothing to be updated.") else: if request_data.get('new_tag_name'): if not Tag.get_or_none(Tag.f_name == new_tag_name): tag.f_name = new_tag_name else: return get_json_result( 100, retmsg="'{}' tag already exists.".format( new_tag_name)) tag.f_desc = new_tag_desc tag.save() return get_json_result( retmsg= "Infomation of '{}' tag has been updated successfully.". format(tag_name)) else: delete_query = ModelTag.delete().where(ModelTag.f_t_id == tag.f_id) delete_query.execute() Tag.delete_instance(tag) return get_json_result( retmsg="'{}' tag has been deleted successfully.".format( tag_name))
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: adapter = JobRuntimeConfigAdapter(train_runtime_conf) job_parameters = adapter.get_common_parameters().to_dict() with DB.connection_context(): db_model = MLModel.get_or_none( MLModel.f_job_id == job_parameters.get( "model_version"), MLModel.f_role == request_config["role"]) if not db_model: model_info = model_utils.gather_model_info_data(model) model_info['imported'] = 1 model_info['job_id'] = model_info['f_model_version'] model_info['size'] = model.calculate_model_file_size() model_info['role'] = request_config["model_id"].split( '#')[0] model_info['party_id'] = request_config[ "model_id"].split('#')[1] if model_utils.compare_version( model_info['f_fate_version'], '1.5.1') == 'lt': model_info['roles'] = model_info.get( 'f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_info[ 'work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True model_utils.save_model_info(model_info) else: stat_logger.info( f'job id: {job_parameters.get("model_version")}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) submit_result = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update(submit_result) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)
def migration(config_data: dict): try: party_model_id = model_utils.gen_party_model_id( model_id=config_data["model_id"], role=config_data["local"]["role"], party_id=config_data["local"]["party_id"]) model = pipelined_model.PipelinedModel( model_id=party_model_id, model_version=config_data["model_version"]) if not model.exists(): raise Exception("Can not found {} {} model local cache".format( config_data["model_id"], config_data["model_version"])) with DB.connection_context(): if MLModel.get_or_none(MLModel.f_model_version == config_data["unify_model_version"]): raise Exception( "Unify model version {} has been occupied in database. " "Please choose another unify model version and try again.". format(config_data["unify_model_version"])) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") migrate_model = pipelined_model.PipelinedModel( model_id=model_utils.gen_party_model_id( model_id=model_utils.gen_model_id(config_data["migrate_role"]), role=config_data["local"]["role"], party_id=config_data["local"]["migrate_party_id"]), model_version=config_data["unify_model_version"]) # migrate_model.create_pipelined_model() shutil.copytree(src=model.model_path, dst=migrate_model.model_path) pipeline = migrate_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # Utilize Pipeline_model collect model data. And modify related inner information of model train_runtime_conf = json_loads(pipeline.train_runtime_conf) train_runtime_conf["role"] = config_data["migrate_role"] train_runtime_conf["initiator"] = config_data["migrate_initiator"] adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_id=model_utils.gen_model_id(train_runtime_conf["role"]), model_version=migrate_model.model_version) # update pipeline.pb file pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) pipeline.model_id = bytes( adapter.get_common_parameters().to_dict.get("model_id"), "utf-8") pipeline.model_version = bytes( adapter.get_common_parameters().to_dict().get("model_version"), "utf-8") # save updated pipeline.pb file migrate_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(migrate_model.model_path, "pipeline.pb"), os.path.join(migrate_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) # modify proto with open( os.path.join(migrate_model.model_path, 'define', 'define_meta.yaml'), 'r') as fin: define_yaml = yaml.safe_load(fin) for key, value in define_yaml['model_proto'].items(): if key == 'pipeline': continue for v in value.keys(): buffer_obj = migrate_model.read_component_model(key, v) module_name = define_yaml['component_define'].get( key, {}).get('module_name') modified_buffer = model_migration( model_contents=buffer_obj, module_name=module_name, old_guest_list=config_data['role']['guest'], new_guest_list=config_data['migrate_role']['guest'], old_host_list=config_data['role']['host'], new_host_list=config_data['migrate_role']['host'], old_arbiter_list=config_data.get('role', {}).get('arbiter', None), new_arbiter_list=config_data.get('migrate_role', {}).get('arbiter', None)) migrate_model.save_component_model( component_name=key, component_module_name=module_name, model_alias=v, model_buffers=modified_buffer) archive_path = migrate_model.packaging_model() shutil.rmtree(os.path.abspath(migrate_model.model_path)) return (0, f"Migrating model successfully. " \ "The configuration of model has been modified automatically. " \ "New model id is: {}, model version is: {}. " \ "Model files can be found at '{}'.".format(adapter.get_common_parameters()["model_id"], migrate_model.model_version, os.path.abspath(archive_path)), {"model_id": migrate_model.model_id, "model_version": migrate_model.model_version, "path": os.path.abspath(archive_path)}) except Exception as e: return 100, str(e), {}
def load_model(): request_config = request.json if request_config.get('job_id', None): with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_job_id == request_config.get("job_id"), MLModel.f_role == 'guest') if model: model_info = model.to_json() request_config['initiator'] = {} request_config['initiator']['party_id'] = str( model_info.get('f_initiator_party_id')) request_config['initiator']['role'] = model_info.get( 'f_initiator_role') request_config['job_parameters'] = model_info.get( 'f_runtime_conf').get('job_parameters') request_config['role'] = model_info.get('f_runtime_conf').get( 'role') for key, value in request_config['role'].items(): for i, v in enumerate(value): value[i] = str(v) request_config.pop('job_id') else: return get_json_result( retcode=101, retmsg="model with version {} can not be found in database. " "Please check if the model version is valid.".format( request_config.get('job_id'))) _job_id = job_utils.generate_job_id() initiator_party_id = request_config['initiator']['party_id'] initiator_role = request_config['initiator']['role'] publish_model.generate_publish_model_info(request_config) load_status = True load_status_info = {} load_status_msg = 'success' load_status_info['detail'] = {} if "federated_mode" not in request_config['job_parameters']: if request_config["job_parameters"][ "work_mode"] == WorkMode.STANDALONE: request_config['job_parameters'][ "federated_mode"] = FederatedMode.SINGLE elif request_config["job_parameters"]["work_mode"] == WorkMode.CLUSTER: request_config['job_parameters'][ "federated_mode"] = FederatedMode.MULTIPLE for role_name, role_partys in request_config.get("role").items(): if role_name == 'arbiter': continue load_status_info[role_name] = load_status_info.get(role_name, {}) load_status_info['detail'][role_name] = {} for _party_id in role_partys: request_config['local'] = { 'role': role_name, 'party_id': _party_id } try: response = federated_api( job_id=_job_id, method='POST', endpoint='/model/load/do', src_party_id=initiator_party_id, dest_party_id=_party_id, src_role=initiator_role, json_body=request_config, federated_mode=request_config['job_parameters'] ['federated_mode']) load_status_info[role_name][_party_id] = response['retcode'] detail = {_party_id: {}} detail[_party_id]['retcode'] = response['retcode'] detail[_party_id]['retmsg'] = response['retmsg'] load_status_info['detail'][role_name].update(detail) if response['retcode']: load_status = False load_status_msg = 'failed' except Exception as e: stat_logger.exception(e) load_status = False load_status_msg = 'failed' load_status_info[role_name][_party_id] = 100 return get_json_result(job_id=_job_id, retcode=(0 if load_status else 101), retmsg=load_status_msg, data=load_status_info)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: with DB.connection_context(): model = MLModel.get_or_none( MLModel.f_job_id == train_runtime_conf[ "job_parameters"]["model_version"], MLModel.f_role == request_config["role"]) if not model: MLModel.create( f_role=request_config["role"], f_party_id=request_config["party_id"], f_roles=train_runtime_conf["role"], f_job_id=train_runtime_conf["job_parameters"] ["model_version"], f_model_id=train_runtime_conf["job_parameters"] ["model_id"], f_model_version=train_runtime_conf[ "job_parameters"]["model_version"], f_initiator_role=train_runtime_conf[ "initiator"]["role"], f_initiator_party_id=train_runtime_conf[ "initiator"]["party_id"], f_runtime_conf=train_runtime_conf, f_work_mode=train_runtime_conf[ "job_parameters"]["work_mode"], f_dsl=json_loads(pipeline.train_dsl), f_imported=1, f_job_status='complete') else: stat_logger.info( f'job id: {train_runtime_conf["job_parameters"]["model_version"]}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update({ 'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path, 'board_url': board_url, 'logs_directory': logs_directory }) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)