def train_and_test(model, optimizer, criterian, scheduler, batch_size, embeddings_size, seq_length, save_madel_path, epochs, device): ''' 配置模型的训练和测试过程 ''' data_loader = Dataloader(word2vec_path, batch_size, embeddings_size, seq_length, device) # 初始化数据迭代器 texts, labels = data_loader.load_data(train_file, shuffle=True, mode='train') # 加载数据 print('Data load completed...') # 将数据集划分为训练集和测试集 train_texts = texts[: int(len(texts) * 0.8)] test_texts = texts[int(len(texts) * 0.8): ] train_labels = labels[: int(len(texts) * 0.8)] test_labels = labels[int(len(texts) * 0.8): ] # 获取训练/测试步数 train_steps = len(train_texts) // batch_size test_steps = len(test_texts) // batch_size best_test_acc = 0.0 # 记录训练过程中的最优结果 for e in range(1, epochs + 1): print('Epoch {}/{}'.format(e, epochs)) # 模型训练 train_data_iterator = data_loader.data_iterator(train_texts, train_labels) train(model, train_data_iterator, train_steps, criterian, optimizer, scheduler) train_data_iterator = data_loader.data_iterator(train_texts, train_labels) test_data_iterator = data_loader.data_iterator(test_texts, test_labels) # 模型测试 train_accuracy = evaluate(model, train_data_iterator, train_steps) test_accuracy = evaluate(model, test_data_iterator, test_steps) print('Training accuracy: ', train_accuracy) print('Testing accuracy: ', test_accuracy) improve_acc = test_accuracy - best_test_acc if improve_acc > 0: # 保存最优模型的参数 print('Found a new best accuracy...') best_test_acc = test_accuracy checkpoint = { 'model_state': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': e, 'accuracy': best_test_acc } torch.save(checkpoint, save_madel_path)
def get_the_final_result(): # 参数配置 batch_size = 512 seq_length = 20 embeddings_size = 300 hidden_size = 256 num_layers = 2 num_classes = 9 learning_rate = 0.003 dropout = 0.3 # 数据文件路径 word2vec_path = './data/word2vec.bin' train_file = './data/train.json' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 定义模型 model = TextRCNN(embeddings_size, num_classes, hidden_size, num_layers, True, dropout) model.to(device) # 加载训练好的模型参数 checkpoints = torch.load('./saved_model/text_rcnn.pth') model.load_state_dict(checkpoints['model_state']) # 加载数据 data_loader = Dataloader(word2vec_path, batch_size, embeddings_size, seq_length, device) # 初始化数据迭代器 texts, labels = data_loader.load_data(train_file, shuffle=True, mode='train') # 加载数据 print('Data load completed...') # 在测试集上进行测试 test_texts = texts[int(len(texts) * 0.8):] test_labels = labels[int(len(texts) * 0.8):] steps = len(test_texts) // batch_size loader = data_loader.data_iterator(test_texts, test_labels) # 测试集上的准确率 accuracy = evaluate(model, loader, steps) print('The final result(Accuracy in Test) is %.2f' % (accuracy * 100))