def load_trained_net(model_path): print("Begin to load pre-trained net ... ", end="") net = load_net("resnet152") checkpoint = torch.load(model_path) net.load_state_dict(checkpoint['state_dict']) net.eval() print("Finished.") return net
def train(net_name, train_full, model_out_dir, epoch_size, batch_size, train_continue): print(f"Train_full: {train_full}") print("Loading net ... ", end="") net = load_net(net_name) net.cuda() print("Finished") criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True, patience=3) init_epoch = 0 if train_continue: checkpoint = torch.load(train_continue) init_epoch = checkpoint["epoch"] - 1 net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("CUDA is available?", torch.cuda.is_available()) print("Start to load train data ...", end=" ") if train_full: train_data = load_dataset.load_from_pickle() # 读取数据集 trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True) # 读取训练集 else: train_data = load_dataset.load_from_pickle() # 读取数据集 trainset, testset = torch.utils.data.random_split( train_data, [ len(train_data) - int(len(train_data) / 10), int(len(train_data) / 10) ]) # 将数据集随机划分为训练集与测试集,比例:[9:1] trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True) # 读取训练集 testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True) # 读取验证集 print("Finished") # 开始训练 print("Start to train ...") for epoch in range(init_epoch, init_epoch + epoch_size): running_loss = 0.0 running_loss_keeper = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() # 训练集训练误差 if i % 20 == 19: print('[%d, %5d] train loss: %.3f' % (epoch + 1, i + 1, running_loss / 20)) running_loss_keeper = running_loss running_loss = 0.0 # print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss)) # running_loss = 0.0 if not train_full: correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data images = images.cuda() labels = labels.cuda() outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() test_acc = 100 * correct / total # 测试集泛化精度 scheduler.step(test_acc) print('Accuracy of the network on the 10000 test images: %d %%' % test_acc) if not os.path.exists(model_out_dir): os.mkdir(model_out_dir) file_path = f"{model_out_dir}/model-{epoch + 1}-{test_acc if not train_full else running_loss_keeper}.pth" state = { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(state, file_path) print('Finished Training')