shuffle=True, num_workers=int(opt.workers)) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=int(opt.workers)) device = torch.device("cuda:0" if opt.cuda else "cpu") ngpu = int(opt.ngpu) net = Regressor(_num_stages=4, _use_avg_on_conv3=False).to(device) if opt.cuda: net = torch.nn.DataParallel(net, device_ids=range(ngpu)) if opt.net != '': net.load_state_dict(torch.load(opt.net)) print(net) criterion = nn.MSELoss() # setup optimizer fc2_params = list(map(id, net.module.fc2.parameters())) base_params = filter(lambda p: id(p) not in fc2_params, net.parameters()) optimizer = optim.SGD([{ 'params': base_params }, { 'params': net.module.fc2.parameters(), 'lr': opt.lr * opt.lrMul }],
assert train_dataset assert test_dataset train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=WORKERS) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=WORKERS) device = torch.device("cuda:0" if CUDA else "cpu") ngpu = int(NGPU) network = Regressor(stages=4, use_avg_on_conv3=False).to(device) #mi_est_model = NWJ(mi_est_z_dim, mi_est_t_dim, mi_est_hidden_size).to(device) if CUDA: network = torch.nn.DataParallel(network, device_ids=range(ngpu)) if NET != '': network.load_state_dict(torch.load(NET)) print(network) criterion = nn.MSELoss() linear2_params = list(map(id, network.module.linear2.parameters())) base_params = filter(lambda p: id(p) not in linear2_params, network.parameters()) #mi_optimizer = optim.Adam(mi_est_model.parameters(), mi_est_learning_rate) optimizer = optim.SGD([{"params":base_params}, {"params":network.module.linear2.parameters(), "lr":LR*LRMUL}], lr=LR, momentum=0.9, weight_decay=5e-4, nesterov=True) #optimizer = optim.Adam() if OPTIMIZER != '': optimizer.load_state_dict(torch.load(OPTIMIZER)) for epoch in range(NITER):
def main(): global best_acc global device start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing dataset cifar10') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) dataloader = datasets.CIFAR10 num_classes = 10 trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model net = Regressor(_num_stages=3, _use_avg_on_conv3=False).to(device) net = torch.nn.DataParallel(net, device_ids=[0]) if args.net != '': net.load_state_dict(torch.load(args.net)) #model = Classifier(_nChannels=192 * 8 * 8, _num_classes=10, _cls_type='MultLayerFC2').to(device) model = Classifier(_nChannels=192, _num_classes=10, _cls_type='Alexnet_conv5').to(device) model = torch.nn.DataParallel(model, device_ids=[0]) cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # Resume title = 'cifar-10-' if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Epoch', 'LR', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, net, model, criterion, start_epoch, use_cuda, device) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(trainloader, net, model, criterion, optimizer, epoch, use_cuda, device) test_loss, test_acc = test(testloader, net, model, criterion, epoch, use_cuda, device) # append logger file #logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) logger.append(epoch, [state['lr'], train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() # logger.plot() # savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)