def get_the_final_result(): # 参数配置 batch_size = 512 seq_length = 20 embeddings_size = 300 hidden_size = 256 num_layers = 2 num_classes = 9 learning_rate = 0.003 dropout = 0.3 # 数据文件路径 word2vec_path = './data/word2vec.bin' train_file = './data/train.json' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 定义模型 model = TextRCNN(embeddings_size, num_classes, hidden_size, num_layers, True, dropout) model.to(device) # 加载训练好的模型参数 checkpoints = torch.load('./saved_model/text_rcnn.pth') model.load_state_dict(checkpoints['model_state']) # 加载数据 data_loader = Dataloader(word2vec_path, batch_size, embeddings_size, seq_length, device) # 初始化数据迭代器 texts, labels = data_loader.load_data(train_file, shuffle=True, mode='train') # 加载数据 print('Data load completed...') # 在测试集上进行测试 test_texts = texts[int(len(texts) * 0.8):] test_labels = labels[int(len(texts) * 0.8):] steps = len(test_texts) // batch_size loader = data_loader.data_iterator(test_texts, test_labels) # 测试集上的准确率 accuracy = evaluate(model, loader, steps) print('The final result(Accuracy in Test) is %.2f' % (accuracy * 100))
if __name__ == "__main__": # 参数配置 epochs = 50 batch_size = 512 seq_length = 20 embeddings_size = 300 hidden_size = 256 num_layers = 2 num_classes = 9 learning_rate = 0.003 dropout = 0.3 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设置随机种子 random.seed(2020) torch.manual_seed(2020) # 加载文本分类模型 TextRCNN model = TextRCNN(embeddings_size, num_classes, hidden_size, num_layers, True, dropout) model.to(device) # 定义损失函数和优化器 criterian = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=learning_rate) scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1/(1 + 0.05 * epoch)) print('-' * 100) train_and_test(model, optimizer, criterian, scheduler, batch_size, embeddings_size, seq_length, './saved_model/text_rcnn.pth', epochs, device)