def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, output_dir, crf=True, remove_O=False): train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data start = time.time() vocab_size = len(word2id) out_size = len(tag2id) bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) model_name = "bilstm_crf" if crf else "bilstm" save_model(bilstm_model, os.path.join(output_dir, model_name + ".pkl")) print("训练完毕,共用时{}秒.".format(int(time.time() - start))) print("评估{}模型中...".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O) metrics.report_scores() metrics.report_confusion_matrix() return pred_tag_lists
def bilstm_train_and_eval(train_data, dev_data, test_data, charEmbedding, word2id, tag2id, crf=True, remove_O=False): train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data start = time.time() vocab_size = len(word2id) out_size = len(tag2id) bilstm_model = BILSTM_Model(vocab_size, charEmbedding, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) model_name = "bilstm_crf" if crf else "bilstm" save_model( bilstm_model, "./ckpts/" + str(bilstm_model.best_val_loss)[:5] + model_name + ".pkl") print("训练完毕,共用时{}秒.".format(int(time.time() - start))) print("评估{}模型中...".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) f = open( 'next_dev_result/' + str(bilstm_model.best_val_loss)[:5] + '_bilstmcrf_result.txt', 'w') for pred_tag_list in pred_tag_lists: f.write(' '.join(pred_tag_list) + '\n') f.close()
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, crf=True): train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data start = time.time() vocab_size = len(word2id) out_size = len(tag2id) bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) model_name = "bilstm_crf" if crf else "bilstm" save_model(bilstm_model, "./ckpts/" + model_name + ".pkl") print("训练完毕,共用时{}秒.".format(int(time.time() - start))) print("评估{}模型中...".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists) return pred_tag_lists
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, crf=True, remove_O=False): train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data start = time.time() vocab_size = len(word2id) out_size = len(tag2id) bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) model_name = "bilstm_crf" if crf else "bilstm" save_model(bilstm_model, "./ckpts/" + model_name + ".pkl") print("Training completed, {} seconds when sharing.".format( int(time.time() - start))) print("Evaluation{} model:...".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O) metrics.report_scores() metrics.report_confusion_matrix() return pred_tag_lists
def bilstm_pred(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, test_word_lists, test_tag_lists): start = time.time() vocab_size = len(bilstm_word2id) out_size = len(bilstm_tag2id) model = BILSTM_Model(vocab_size, out_size, crf=False) model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, bilstm_word2id, bilstm_tag2id) save_model(model, "./ckpts/lstm.pkl") print("训练完毕,共用时{}秒.".format(int(time.time() - start))) pred_tag_lists, test_tag_lists = model.test(test_word_lists, test_tag_lists, word2id, tag2id) return pred_tag_lists
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, crf=True, remove_O=False, reload_model=False): # data train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data # training start = time.time() vocab_size = len(word2id) out_size = len(tag2id) # get model_file if crf: model_name = "bilstm_crf" else: model_name = "bilstm" emb_size = LSTMConfig.emb_size hidden_size = LSTMConfig.hidden_size model_file = "./weights/" + model_name + '_' + str(emb_size) + '_' + str( hidden_size) + ".pkl" if reload_model: # reload trained model! bilstm_model = load_model(model_file) else: # train and save model! bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) save_model( bilstm_model, model_file ) # re-thinking when to save the model? after valid for each epoch? print("Training finished, taken {} seconds!".format( int(time.time() - start))) print("Evaluating {} model:".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O) return pred_tag_lists
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, data2id, crf=True, remove_O=False): train_word_lists, train_data_lists, train_wordlabel_lists, train_datalabel_lists, train_dataptr_lists = train_data dev_word_lists, dev_data_lists, dev_wordlabel_lists, dev_datalabel_lists, dev_dataptr_lists = dev_data test_word_lists, test_data_lists, test_wordlabel_lists, test_datalabel_lists, test_dataptr_lists = test_data start = time.time() vocab_size = len(word2id) data_size = len(data2id) bilstm_model = BILSTM_Model(vocab_size, data_size, crf=crf) bilstm_model.train(train_word_lists, train_data_lists, train_wordlabel_lists, train_datalabel_lists, train_dataptr_lists, dev_word_lists, dev_data_lists, dev_wordlabel_lists, dev_datalabel_lists, dev_dataptr_lists, word2id, data2id) model_name = "bilstm_crf" if crf else "bilstm" save_model(bilstm_model, "./ckpts/" + model_name + ".pkl") print("训练完毕,共用时{}秒.".format(int(time.time() - start))) print("评估{}模型中...".format(model_name)) pred_tag_lists, pred_tag_lists = bilstm_model.test( test_word_lists, test_data_lists, test_wordlabel_lists, test_datalabel_lists, test_dataptr_lists, word2id, data2id) #for pred, gold in zip(pred_tag_lists, test_tag_lists): #print(pred, gold) allnum = 0 correct = 0 for pred, gold in zip(pred_tag_lists, pred_tag_lists): pred = pred[:len(gold)] for x, y in zip(pred, gold): if x == y: correct += 1 allnum += 1 print(correct / allnum) return correct / allnum
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, crf=True, remove_O=False): train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data start = time.time() vocab_size = len(word2id) out_size = len(tag2id) bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) model_name = "bilstm_crf" if crf else "bilstm" save_model(bilstm_model, "./ckpts/" + model_name + ".pkl") print("训练完毕,共用时{}秒.".format(int(time.time() - start))) print("评估{}模型中...".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) with open("./result.txt", "a+") as f: for i in range(len(pred_tag_lists)): f.write(pred_tag_lists[i] + " " + pred_tag_lists[i] + "\n") metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O) metrics.report_scores() metrics.report_confusion_matrix() return pred_tag_lists