def HMM_test(if_train=True): model_is_existed = os.path.exists(ModelPathConfig.hmm) print("upload data!") word_lists, tag_lists, word2id, tag2id = build_corpus("train") test_word_lists, test_tag_lists, _, _ = build_corpus("test") # word_lists,tag_lists,word2id,tag2id=build_corpus("train",data_dir=os.path.join(os.getcwd(),"data",'ResumeNER')) # test_word_lists,test_tag_lists,_,_=build_corpus("test",data_dir=os.path.join(os.getcwd(),"data",'ResumeNER')) hmm_model = HMM(len(tag2id), len(word2id)) if if_train or not model_is_existed: print("start to training") hmm_model.train(word_lists, tag_lists, word2id, tag2id) print("save the model") save_model(hmm_model, ModelPathConfig.hmm) else: print("load model") hmm_model = load_model(ModelPathConfig.hmm) pred_tag_lists = hmm_model.test(test_word_lists, _, word2id, tag2id) label_tag_lists = test_tag_lists units = evaluate_entity_label(pred_tag_lists, label_tag_lists, list(tag2id.keys())) df = unitstopd(units) df.to_csv(ResultPathConfig.hmm_entity) print(tabulate(df, headers='keys', tablefmt='psql')) units = evaluate_single_label(pred_tag_lists, label_tag_lists, list(tag2id.keys())) df = unitstopd(units) df.to_csv(ResultPathConfig.hmm_model) print(tabulate(df, headers='keys', tablefmt='psql'))
def hmm_pred(train_word_lists, train_tag_lists, test_word_lists, test_tag_lists, word2id, tag2id): model = HMM(len(tag2id), len(word2id)) model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(model, "./ckpts/hmm.pkl") pred = model.test(test_word_lists, word2id, tag2id) return pred
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False): # data train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data # training hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") # evaluating pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O) return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id): """训练并评估hmm模型""" # 训练HMM模型 train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") # 评估hmm模型 pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists) return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id): """训练并评估hmm模型""" # 训练HMM模型 train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") # 评估hmm模型 pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) accuracy = evaluate(pred_tag_lists, test_tag_lists) print("HMM 模型的准确率为:{:.2f}%".format(accuracy * 100)) return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False): """训练并评估hmm模型""" # 训练HMM模型 train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") # 评估hmm模型 pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O) metrics.report_scores() metrics.report_confusion_matrix() return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False): """ Train and evaluate the hmm model """ # Training HMM model train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") #Evaluation of the hmm model pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O) metrics.report_scores() metrics.report_confusion_matrix() return pred_tag_lists
def train(train_data, val_data, fold_idx=None): train_dataset = MyDataset(train_data) val_dataset = MyDataset(val_data) train_loader = DataLoader(train_dataset, batch_size=config.batch_size) val_loader = DataLoader(val_dataset, batch_size=config.batch_size) from models.hmm import HMM word2id, id2word = load_vocab() model = HMM(len(config.label2id), len(word2id)) if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join( config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) word_id_list = train_dataset.x_data label_id_list = train_dataset.y_data model.train(word_id_list, label_id_list) y_pred_list = model.predict(train_dataset.x_data) train_score = get_score(train_dataset.y_data, y_pred_list) y_pred_list = model.predict(val_dataset.x_data) val_score = get_score(val_dataset.y_data, y_pred_list) msg = 'train score: {0:>6.2%}, val score: {1:>6.2%}' print(msg.format(train_score, val_score))
#!/usr/bin/env python3 from pos_data import read from models.simple import Simple from models.hmm import HMM from models.complex import Complex from metrics import print_report X_train, y_train = read('data/bc.train', 'train') X_val, y_val = read('data/bc.val', 'train') print('Simple model') sm = Simple() sm.fit(X_train, y_train) y_pred = sm.predict(X_val) print_report(y_val, y_pred) print('HMM') hm = HMM() hm.fit(X_train, y_train) y_pred = hm.predict(X_val) print_report(y_val, y_pred) print('Complex model') cp = Complex() cp.fit(X_train, y_train) y_pred = cp.predict(X_val) print_report(y_val, y_pred)
def hmm_train(train_data, word2id, tag2id): train_word_lists, train_tag_lists = train_data hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, HMM_MODEL_PATH)