def __get_params(self): """ __get_params """ config_dir = _get_abs_path("config") param_path = os.path.join(config_dir, 'infer.json') param_dict = from_file(param_path) self._params = replace_none(param_dict)
def init_data_params(self): """ :return: """ model_path = self.param["inference_model_path"] data_params_path = model_path + "/infer_data_params.json" param_dict = params.from_file(data_params_path) param_dict = params.replace_none(param_dict) self.input_keys = param_dict.get("fields")
def train(self, json_path): """ the function use to retrain model :param model_save_dir: where to saving model after training """ param_dict = params.from_file(json_path) _params = params.replace_none(param_dict) register.import_modules() dataset_reader_params_dict = _params.get("dataset_reader") dataset_reader = dataset_reader_from_params(dataset_reader_params_dict) model_params_dict = _params.get("model") model, num_train_examples = model_from_params(model_params_dict) trainer_params_dict = _params.get("trainer") trainer = build_trainer(trainer_params_dict, dataset_reader, model, num_train_examples) trainer.train_and_eval() logging.info("end of run train and eval .....")
def init_model(self, model_class="ernie_1.0_skep_large_ch", task="sentiment_classify", use_cuda=False): """ init_model """ ptm = self._params.get("model_name").get(model_class) ptm_id = ptm.get('type') task_id = self._params.get("task_name").get(task) model_dict = self._params.get("model_class").get(ptm_id + task_id) # step 1: get_init_model, if download data_url = model_dict.get("model_file_http_url") md5_url = model_dict.get("model_md5_http_url") is_download_data = download_data(data_url, md5_url) # step 2 get model_class register.import_modules() model_name = model_dict.get("type") self.model_class = RegisterSet.models.__getitem__(model_name)( model_dict) # step 3 init data params model_path = _get_abs_path(model_dict.get("inference_model_path")) data_params_path = model_path + "/infer_data_params.json" param_dict = from_file(data_params_path) param_dict = replace_none(param_dict) self.input_keys = param_dict.get("fields") # step 4 init env self.inference = self.__load_inference_model(model_path, use_cuda) # step 5: tokenizer tokenizer_info = model_dict.get("predict_reader").get('tokenizer') tokenizer_name = tokenizer_info.get('type') tokenizer_vocab_path = _get_abs_path(tokenizer_info.get('vocab_path')) tokenizer_params = None if tokenizer_info.__contains__("params"): tokenizer_params = tokenizer_info.get("params") bpe_v_file = tokenizer_params["bpe_vocab_file"] bpe_j_file = tokenizer_params["bpe_json_file"] tokenizer_params["bpe_vocab_file"] = _get_abs_path(bpe_v_file) tokenizer_params["bpe_json_file"] = _get_abs_path(bpe_j_file) tokenizer_class = RegisterSet.tokenizer.__getitem__(tokenizer_name) self.tokenizer = tokenizer_class(vocab_file=tokenizer_vocab_path, split_char=" ", unk_token="[UNK]", params=tokenizer_params) self.max_seq_len = 512 self.truncation_type = 0 self.padding_id = 1 if tokenizer_name == "GptBpeTokenizer" else 0 self.inference_type = model_dict.get("inference_type", None) # step6: label_map label_map_file = model_dict.get("label_map_path", None) self.label_map = {} if isinstance(label_map_file, str): label_map_file = _get_abs_path(label_map_file) with open(label_map_file, 'r') as fr: for line in fr.readlines(): line = line.strip('\r\n') items = line.split('\t') idx, label = int(items[1]), items[0] self.label_map[idx] = label
def build_inference(params_dict, dataset_reader, model): """build trainer""" inference = Inference(param=params_dict, data_set_reader=dataset_reader, model_class=model) return inference if __name__ == "__main__": args = args.build_common_arguments() log.init_log("./log/infer", level=logging.DEBUG) # 分类任务的预测 param_dict = params.from_file(args.param_path) _params = params.replace_none(param_dict) register.import_modules() dataset_reader_params_dict = _params.get("dataset_reader") dataset_reader = dataset_reader_from_params(dataset_reader_params_dict) model_params_dict = _params.get("model") model = model_from_params(model_params_dict) inference_params_dict = _params.get("inference") inference = build_inference(inference_params_dict, dataset_reader, model) inference.do_inference() logging.info("os exit.")