# Test test_distillation = 0.0 test_teacher = 0.0 test_student = 0.0 test_sobolev = 0.0 for X, y in testloader: X, y = V(X.cuda()), V(y.cuda()) s_preds = student(X) t_preds = teacher(X) s_loss = distillation_loss(s_preds, t_preds.detach()) t_loss = label_loss(t_preds, y) s_opt.zero_grad() t_opt.zero_grad() s_loss.backward() t_loss.backward() if USE_SOBOLEV: sobolev_loss = sobolev(student.parameters(), teacher.parameters()) test_student += label_loss(s_preds, y).data[0] test_distillation += s_loss.data[0] test_teacher += t_loss.data[0] if USE_SOBOLEV: test_sobolev += sobolev_loss.data[0] print('*' * 20, 'Test Stats', '*' * 20) print('distillation_loss:', test_distillation / len(testloader)) print('student_loss: ', test_student / len(testloader)) print('teacher_loss:', test_teacher / len(testloader)) print('sobolev_loss: ', test_sobolev / len(testloader)) print('\n') th.save(student.state_dict(), './student.pth')
def main(): parser = argparse.ArgumentParser() mode_group = parser.add_mutually_exclusive_group(required=True) mode_group.add_argument("--train", action="store_true", help="To train the network.") mode_group.add_argument("--test", action="store_true", help="To test the network.") parser.add_argument("--epochs", default=10, type=int, help="Desired number of epochs.") parser.add_argument("--dropout", action="store_true", help="Whether to use dropout or not.") parser.add_argument("--uncertainty", action="store_true", help="Use uncertainty or not.") parser.add_argument("--dataset", action="store_true", help="The dataset to use.") parser.add_argument("--outsample", action="store_true", help="Use out of sample test image") uncertainty_type_group = parser.add_mutually_exclusive_group() uncertainty_type_group.add_argument( "--mse", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Expected Mean Square Error." ) uncertainty_type_group.add_argument( "--digamma", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Expected Cross Entropy." ) uncertainty_type_group.add_argument( "--log", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Negative Log of the Expected Likelihood." ) dataset_type_group = parser.add_mutually_exclusive_group() dataset_type_group.add_argument( "--mnist", action="store_true", help="Set this argument when using MNIST dataset") dataset_type_group.add_argument( "--emnist", action="store_true", help="Set this argument when using EMNIST dataset") dataset_type_group.add_argument( "--CIFAR", action="store_true", help="Set this argument when using CIFAR dataset") dataset_type_group.add_argument( "--fmnist", action="store_true", help="Set this argument when using FMNIST dataset") args = parser.parse_args() if args.dataset: if args.mnist: from mnist import dataloaders, label_list elif args.CIFAR: from CIFAR import dataloaders, label_list elif args.fmnist: from fashionMNIST import dataloaders, label_list if args.train: num_epochs = args.epochs use_uncertainty = args.uncertainty num_classes = 10 model = LeNet(dropout=args.dropout) if use_uncertainty: if args.digamma: criterion = edl_digamma_loss elif args.log: criterion = edl_log_loss elif args.mse: criterion = edl_mse_loss else: parser.error( "--uncertainty requires --mse, --log or --digamma.") else: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.005) exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) device = get_device() model = model.to(device) model, metrics = train_model(model, dataloaders, num_classes, criterion, optimizer, scheduler=exp_lr_scheduler, num_epochs=num_epochs, device=device, uncertainty=use_uncertainty) state = { "epoch": num_epochs, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), } if use_uncertainty: if args.digamma: torch.save(state, "./results/model_uncertainty_digamma.pt") print("Saved: ./results/model_uncertainty_digamma.pt") if args.log: torch.save(state, "./results/model_uncertainty_log.pt") print("Saved: ./results/model_uncertainty_log.pt") if args.mse: torch.save(state, "./results/model_uncertainty_mse.pt") print("Saved: ./results/model_uncertainty_mse.pt") else: torch.save(state, "./results/model.pt") print("Saved: ./results/model.pt") elif args.test: use_uncertainty = args.uncertainty device = get_device() model = LeNet() model = model.to(device) optimizer = optim.Adam(model.parameters()) if use_uncertainty: if args.digamma: checkpoint = torch.load( "./results/model_uncertainty_digamma.pt") if args.log: checkpoint = torch.load("./results/model_uncertainty_log.pt") if args.mse: checkpoint = torch.load("./results/model_uncertainty_mse.pt") else: checkpoint = torch.load("./results/model.pt") filename = "./results/rotate.jpg" model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) model.eval() if args.outsample: img = Image.open("./data/arka.jpg").convert('L').resize((28, 28)) img = TF.to_tensor(img) img.unsqueeze_(0) else: a = iter(dataloaders['test']) img, label = next(a) rotating_image_classification(model, img, filename, label_list, uncertainty=use_uncertainty) img = transforms.ToPILImage()(img[0][0]) test_single_image(model, img, label_list, uncertainty=use_uncertainty)
for epoch in range(200): epoch_teacher = 0.0 if epoch == 100 or epoch == 150: for group in t_opt.param_groups: group['lr'] *= 0.1 for X, y in trainloader: X, y = V(X.cuda()), V(y.cuda()) t_preds = teacher(X) t_loss = label_loss(t_preds, y) t_opt.zero_grad() t_loss.backward() t_opt.step() epoch_teacher += t_loss.data[0] print('*' * 20, 'Epoch ', epoch, '*' * 20) print('teacher_loss:', epoch_teacher / len(trainloader)) print('\n') # Test test_teacher = 0.0 for X, y in testloader: X, y = V(X.cuda()), V(y.cuda()) t_preds = teacher(X) t_loss = label_loss(t_preds, y) t_opt.zero_grad() test_teacher += t_loss.data[0] print('*' * 20, 'Test Stats', '*' * 20) print('teacher_loss:', test_teacher / len(testloader)) print('\n') th.save(teacher.state_dict(), './teacher.pth')
inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 net.eval() correct = 0 total = 0 with torch.no_grad(): for data in val_loader : inputs, labels = data inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device) outputs = net(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the test images: %.2f' % (100 * correct / total)) torch.save(net.state_dict(), './expr/%s_%s_%.2f.pth' % (str(epoch), args.net, 100 * correct / total)) print("Done Training!")
def training(model_name, trainloader, validloader, input_channel=3, epochs=1, resume=True, self_define=True, only_print=False): # load self defined or official net assert model_name in ["LeNet", "VGG16", "ResNet", "DenseNet"] if self_define: if model_name == "LeNet": net = LeNet(input_channel) elif model_name == "VGG16": net = VGG16(input_channel) elif model_name == "ResNet": net = ResNet(input_channel) elif model_name == "DenseNet": net = DenseNet(input_channel) else: if model_name == "LeNet": net = LeNet(input_channel) # on official LeNet elif model_name == "VGG16": net = models.vgg16_bn(pretrained=False, num_classes=10) elif model_name == "ResNet": net = models.resnet50(pretrained=False, num_classes=10) elif model_name == "DenseNet": net = models.DenseNet(num_classes=10) # sum of net parameters number print("Number of trainable parameters in %s : %f" % (model_name, sum(p.numel() for p in net.parameters() if p.requires_grad))) # print model structure if only_print: print(net) return # resume training param_path = "./model/%s_%s_parameter.pt" % (model_name, "define" if self_define else "official") if resume: if os.path.exists(param_path): net.load_state_dict(torch.load(param_path)) net.train() print("Resume training " + model_name) else: print("Train %s from scratch" % model_name) else: print("Train %s from scratch" % model_name) # define loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # train on GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('train on %s' % device) net.to(device) running_loss = 0.0 train_losses = [] valid_losses = [] mini_batches = 125 * 5 for epoch in range(epochs): for i, data in enumerate(trainloader, 0): # get one batch # inputs, labels = data inputs, labels = data[0].to(device), data[1].to(device) # switch model to training mode, clear gradient accumulators net.train() optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % mini_batches == mini_batches - 1: # print and valid every <mini_batches> mini-batches # validate model in validation dataset valid_loss = valid(net, validloader, criterion, device) print('[%d, %5d] train loss: %.3f, validset loss: %.3f' % ( epoch + 1, i + 1, running_loss / mini_batches, valid_loss)) train_losses.append(running_loss / mini_batches) valid_losses.append(valid_loss) running_loss = 0.0 # save parameters torch.save(net.state_dict(), param_path) # # save checkpoint # torch.save({ # 'epoch': epoch, # 'model_state_dict': net.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'loss': loss # }, "./checkpoints/epoch_" + str(epoch) + ".tar") print('Finished Training, %d images in all' % (len(train_losses) * batch_size * mini_batches / epochs)) # draw loss curve assert len(train_losses) == len(valid_losses) loss_x = range(0, len(train_losses)) plt.plot(loss_x, train_losses, label="train loss") plt.plot(loss_x, valid_losses, label="valid loss") plt.title("Loss for every %d mini-batch" % mini_batches) plt.xlabel("%d mini-batches" % mini_batches) plt.ylabel("Loss") plt.legend() plt.savefig(model_name + "_loss.png") plt.show()
def main(): parser = argparse.ArgumentParser() mode_group = parser.add_mutually_exclusive_group(required=True) mode_group.add_argument("--train", action="store_true", help="To train the network.") mode_group.add_argument("--test", action="store_true", help="To test the network.") mode_group.add_argument("--examples", action="store_true", help="To example MNIST data.") parser.add_argument("--epochs", default=10, type=int, help="Desired number of epochs.") parser.add_argument("--dropout", action="store_true", help="Whether to use dropout or not.") parser.add_argument("--uncertainty", action="store_true", help="Use uncertainty or not.") uncertainty_type_group = parser.add_mutually_exclusive_group() uncertainty_type_group.add_argument( "--mse", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Expected Mean Square Error." ) uncertainty_type_group.add_argument( "--digamma", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Expected Cross Entropy." ) uncertainty_type_group.add_argument( "--log", action="store_true", help= "Set this argument when using uncertainty. Sets loss function to Negative Log of the Expected Likelihood." ) args = parser.parse_args() if args.examples: examples = enumerate(dataloaders["val"]) batch_idx, (example_data, example_targets) = next(examples) fig = plt.figure() for i in range(6): plt.subplot(2, 3, i + 1) plt.tight_layout() plt.imshow(example_data[i][0], cmap="gray", interpolation="none") plt.title("Ground Truth: {}".format(example_targets[i])) plt.xticks([]) plt.yticks([]) plt.savefig("./images/examples.jpg") elif args.train: num_epochs = args.epochs use_uncertainty = args.uncertainty num_classes = 10 model = LeNet(dropout=args.dropout) if use_uncertainty: if args.digamma: criterion = edl_digamma_loss elif args.log: criterion = edl_log_loss elif args.mse: criterion = edl_mse_loss else: parser.error( "--uncertainty requires --mse, --log or --digamma.") else: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.005) exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) device = get_device() model = model.to(device) model, metrics = train_model(model, dataloaders, num_classes, criterion, optimizer, scheduler=exp_lr_scheduler, num_epochs=num_epochs, device=device, uncertainty=use_uncertainty) state = { "epoch": num_epochs, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), } if use_uncertainty: if args.digamma: torch.save(state, "./results/model_uncertainty_digamma.pt") print("Saved: ./results/model_uncertainty_digamma.pt") if args.log: torch.save(state, "./results/model_uncertainty_log.pt") print("Saved: ./results/model_uncertainty_log.pt") if args.mse: torch.save(state, "./results/model_uncertainty_mse.pt") print("Saved: ./results/model_uncertainty_mse.pt") else: torch.save(state, "./results/model.pt") print("Saved: ./results/model.pt") elif args.test: use_uncertainty = args.uncertainty device = get_device() model = LeNet() model = model.to(device) optimizer = optim.Adam(model.parameters()) if use_uncertainty: if args.digamma: checkpoint = torch.load( "./results/model_uncertainty_digamma.pt") filename = "./results/rotate_uncertainty_digamma.jpg" if args.log: checkpoint = torch.load("./results/model_uncertainty_log.pt") filename = "./results/rotate_uncertainty_log.jpg" if args.mse: checkpoint = torch.load("./results/model_uncertainty_mse.pt") filename = "./results/rotate_uncertainty_mse.jpg" else: checkpoint = torch.load("./results/model.pt") filename = "./results/rotate.jpg" model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) model.eval() rotating_image_classification(model, digit_one, filename, uncertainty=use_uncertainty) img = Image.open("./data/one.jpg").convert('L') test_single_image(model, img, uncertainty=use_uncertainty)
loss_mean += loss.item() train_curve.append(loss.item()) if (i + 1) % log_interval == 0: loss_mean = loss_mean / log_interval print( "Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}" .format(epoch, MAX_EPOCH, i + 1, len(train_loader), loss_mean, correct / total)) loss_mean = 0. scheduler.step() # 更新学习率 if (epoch + 1) % checkpoint_interval == 0: checkpoint = { "model_state_dict": net.state_dict(), "optimizer_state_dic": optimizer.state_dict(), "loss": loss, "epoch": epoch } path_checkpoint = "./checkpint_{}_epoch.pkl".format(epoch) torch.save(checkpoint, path_checkpoint) # if epoch > 5: # print("训练意外中断...") # break # validate the model if (epoch + 1) % val_interval == 0: correct_val = 0.
correct += (predicted == labels).squeeze().sum().numpy() # 打印训练信息 loss_mean += loss.item() train_curve.append(loss.item()) if (i+1) % log_interval == 0: loss_mean = loss_mean / log_interval print("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format( epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total)) loss_mean = 0. scheduler.step() # 更新学习率 if (epoch+1) % checkpoint_interval == 0: checkpoint = {"model_state_dict": net.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "epoch": epoch} path_checkpoint = "./checkpoint_{}_epoch.pkl".format(epoch) torch.save(checkpoint, path_checkpoint) if epoch > 5: print("训练意外中断...") break # validate the model if (epoch+1) % val_interval == 0: correct_val = 0. total_val = 0. loss_val = 0.