def test_lenet(self): from models import LeNet n_outputs = 10 model = LeNet(num_classes=n_outputs) model.eval() x = torch.randn(20,3,32,32) outputs = model(x) self.assertTrue(outputs.shape[0] == x.shape[0]) self.assertTrue(outputs.shape[1] == n_outputs)
def test_get_mods(self): from models import LeNet from altmin import get_mods model = LeNet() model.eval() x = torch.randn(20, 3, 32, 32) outputs = model(x) model_mods = get_mods(model) self.assertTrue( len(model.features) + len(model.classifier) >= len(model_mods))
def test_get_codes(self): from models import LeNet from altmin import get_mods, get_codes model = LeNet() model.eval() x = torch.randn(20, 3, 32, 32) outputs = model(x) model_mods = get_mods(model) out1, codes = get_codes(model_mods, x) out2 = model_mods(x) self.assertAlmostEqual((outputs - out1).abs().mean().item(), 0) self.assertAlmostEqual((out1 - out2).abs().mean().item(), 0)
class Solver(object): def __init__(self, config): self.model = None self.lr = config.lr self.epochs = config.epoch self.train_batch_size = config.trainBatchSize self.test_batch_size = config.testBatchSize self.criterion = None self.optimizer = None self.scheduler = None self.device = None self.cuda = config.cuda self.train_loader = None self.test_loader = None def load_data(self): train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform) self.train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=self.train_batch_size, shuffle=True) test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform) self.test_loader = torch.utils.data.DataLoader( dataset=test_set, batch_size=self.test_batch_size, shuffle=False) def load_model(self): if self.cuda: self.device = torch.device('cuda') cudnn.benchmark = True else: self.device = torch.device('cpu') self.model = LeNet().to(self.device) # self.model = AlexNet().to(self.device) # self.model = VGG11().to(self.device) # self.model = VGG13().to(self.device) # self.model = VGG16().to(self.device) # self.model = VGG19().to(self.device) # self.model = GoogLeNet().to(self.device) # self.model = resnet18().to(self.device) # self.model = resnet34().to(self.device) # self.model = resnet50().to(self.device) # self.model = resnet101().to(self.device) # self.model = resnet152().to(self.device) # self.model = DenseNet121().to(self.device) # self.model = DenseNet161().to(self.device) # self.model = DenseNet169().to(self.device) # self.model = DenseNet201().to(self.device) # self.model = WideResNet(depth=28, num_classes=10).to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5) self.criterion = nn.CrossEntropyLoss().to(self.device) def train(self): print("train:") self.model.train() train_loss = 0 train_correct = 0 total = 0 for batch_num, (data, target) in enumerate(self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) feature = self.model.feature # print('output.shape = {}, target.shape = {}, feature.shape = {}'.format(output.size(), target.size(), feature.size())) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() prediction = torch.max( output, 1) # second param "1" represents the dimension to be reduced total += target.size(0) # train_correct incremented by one if predicted right train_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.train_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_num + 1), 100. * train_correct / total, train_correct, total)) return train_loss, train_correct / total def test(self): print("test:") self.model.eval() test_loss = 0 test_correct = 0 total = 0 with torch.no_grad(): for batch_num, (data, target) in enumerate(self.test_loader): data, target = data.to(self.device), target.to(self.device) output = self.model(data) # CAM # feature = self.model.feature # print('feature: {}'.format(feature)) loss = self.criterion(output, target) test_loss += loss.item() prediction = torch.max(output, 1) total += target.size(0) test_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.test_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_num + 1), 100. * test_correct / total, test_correct, total)) return test_loss, test_correct / total def save(self): model_out_path = "model.pth" torch.save(self.model, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) def run(self): self.load_data() print('Success loading data.') self.load_model() print('Success loading model.') accuracy = 0 for epoch in range(1, self.epochs + 1): self.scheduler.step(epoch) print("\n===> epoch: %d/200" % epoch) train_result = self.train() print(train_result) test_result = self.test() accuracy = max(accuracy, test_result[1]) if epoch == self.epochs: print("===> BEST ACC. PERFORMANCE: %.3f%%" % (accuracy * 100)) self.save()
w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device), global_grad_data=w_glob_grad) w_locals[idx] = copy.deepcopy(w) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = cal_ave_weight(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) # print loss loss_avg = sum(loss_locals) / len(loss_locals) writer_loss.add_scalar("train_loss", loss_avg, iter) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) #test after a round net_glob.eval() acc_test, loss_test = test_img(net_glob, dataset_test, args) writer_acc.add_scalar("test_acc", acc_test, iter) #loss_train.append(loss_avg) writer_loss.close() # testing net_glob.eval() acc_train, loss_train = test_img(net_glob, dataset_train, args) acc_test, loss_test = test_img(net_glob, dataset_test, args) print("Training accuracy: {:.2f}".format(acc_train)) print("Testing accuracy: {:.2f}".format(acc_test)) if __name__ == '__main__': main()
def initiate_mnist(dataset, random_model=False): use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} if dataset == "mnist": train_loader = torch.utils.data.DataLoader(datasets.MNIST( root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]), ), batch_size=50, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]), ), batch_size=10000, shuffle=False, **kwargs) elif dataset == "fashion": train_loader = torch.utils.data.DataLoader(datasets.FashionMNIST( root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]), ), batch_size=50, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.FashionMNIST( './data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]), ), batch_size=10000, shuffle=False, **kwargs) classes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] def show_images(images, labels): num_img = len(images) np_images = [img.numpy() for img in images] fig, axes = plt.subplots(nrows=1, ncols=num_img, figsize=(20, 45)) for i, ax in enumerate(axes.flat): ax.set_axis_off() im = ax.imshow(np_images[i], vmin=0., vmax=1.) ax.set_title(f'{labels[i]}') plt.axis("off") fig.subplots_adjust(bottom=0.1, top=0.9, left=0.1, right=0.8, wspace=0.1, hspace=0.25) plt.show() images, labels = iter(train_loader).next() num_img_to_plot = 9 images = [images[i].permute(1, 2, 0) for i in range(num_img_to_plot)] labels = [classes[i] for i in labels[:num_img_to_plot]] # show_images(images, labels) model = LeNet().to(device) model_2 = LeNet().to(device) if not random_model: if not use_cuda: model.load_state_dict( torch.load("checkpoints/" + dataset + "/CNN.pt", map_location='cpu')) model_2.load_state_dict( torch.load("checkpoints/" + dataset + "/mnist_PGD_e_20.pt", map_location='cpu')) else: model.load_state_dict( torch.load("checkpoints/" + dataset + "/CNN.pt")) model_2.load_state_dict( torch.load("checkpoints/" + dataset + "/mnist_PGD_e_20.pt")) # model.load_state_dict(torch.load("checkpoints/CNN.pt")) model.eval() model_2.eval() test_loss, test_acc = test(model, test_loader) print(f'Clean \t loss: {test_loss:.4f} \t acc: {test_acc:.4f}') return model, model_2, train_loader, test_loader
class Reptile(object): def __init__(self, args): self.args = args self._load_model() self.model.to(args.device) self.task_generator = TaskGen(args.max_num_classes) self.outer_stepsize = args.outer_stepsize self.criterion = nn.CrossEntropyLoss() # self.optimizer = optim.Adam(self.model.parameters(), lr=args.inner_stepsize) def _load_model(self): self.model = LeNet() self.current_iteration = 0 if os.path.exists(self.args.model_path): try: print("Loading model from: {}".format(self.args.model_path)) self.model.load_state_dict(torch.load(self.args.model_path)) self.current_iteration = joblib.load("{}.iter".format( self.args.model_path)) except Exception as e: print( "Exception: {}\nCould not load model from {} - starting from scratch" .format(e, self.args.model_path)) def inner_training(self, x, y, num_iterations): """ Run training on task """ x, y = shuffle_unison(x, y) self.model.train() x = torch.tensor(x, dtype=torch.float, device=self.args.device) y = torch.tensor(y, dtype=torch.float, device=self.args.device) total_loss = 0 for _ in range(num_iterations): start = np.random.randint(0, len(x) - self.args.inner_batch_size + 1) self.model.zero_grad() # self.optimizer.zero_grad() outputs = self.model(x[start:start + self.args.inner_batch_size]) # print("output: {} - y: {}".format(outputs.shape, y.shape)) loss = self.criterion( outputs, Variable(y[start:start + self.args.inner_batch_size].long())) total_loss += loss loss.backward() # self.optimizer.step() # Similar to calling optimizer.step() for param in self.model.parameters(): param.data -= self.args.inner_stepsize * param.grad.data return total_loss / self.args.inner_iterations def _meta_gradient_update(self, iteration, num_classes, weights_before): """ Interpolate between current weights and trained weights from this task I.e. (weights_before - weights_after) is the meta-gradient - iteration: current iteration - used for updating outer_stepsize - num_classes: current classifier number of classes - weights_before: state of weights before inner steps training """ weights_after = self.model.state_dict() outer_stepsize = self.outer_stepsize * ( 1 - iteration / self.args.n_iterations) # linear schedule self.model.load_state_dict({ name: weights_before[name] + (weights_after[name] - weights_before[name]) * outer_stepsize for name in weights_before }) def meta_training(self): # Reptile training loop total_loss = 0 try: while self.current_iteration < self.args.n_iterations: # Generate task data, labels, original_labels, num_classes = self.task_generator.get_train_task( args.num_classes) weights_before = deepcopy(self.model.state_dict()) loss = self.inner_training(data, labels, self.args.inner_iterations) total_loss += loss if self.current_iteration % self.args.log_every == 0: print("-----------------------------") print("iteration {}".format( self.current_iteration + 1)) print("Loss: {:.3f}".format(total_loss / (self.current_iteration + 1))) print("Current task info: ") print("\t- Number of classes: {}".format(num_classes)) print("\t- Batch size: {}".format(len(data))) print("\t- Labels: {}".format(set(original_labels))) self.test() self._meta_gradient_update(self.current_iteration, num_classes, weights_before) self.current_iteration += 1 torch.save(self.model.state_dict(), self.args.model_path) except KeyboardInterrupt: print("Manual Interrupt...") print("Saving to: {}".format(self.args.model_path)) torch.save(self.model.state_dict(), self.args.model_path) joblib.dump(self.current_iteration, "{}.iter".format(self.args.model_path), compress=1) def predict(self, x): self.model.eval() x = torch.tensor(x, dtype=torch.float, device=self.args.device) outputs = self.model(x) return outputs.cpu().data.numpy() def test(self): """ Run tests 1. Create task from test set. 2. Reload model 3. Check accuracy on test set 4. Train for one or more iterations on one task 5. Check accuracy again on test set """ test_data, test_labels, _, _ = self.task_generator.get_test_task( selected_labels=[1, 2, 3, 4, 5], num_samples=-1) # all available samples predicted_labels = np.argmax(self.predict(test_data), axis=1) accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100 print( "Accuracy before few shots learning (a.k.a. zero-shot learning): {:.2f}%\n----" .format(accuracy)) weights_before = deepcopy( self.model.state_dict()) # save snapshot before evaluation for i in range(1, 5): enroll_data, enroll_labels, _, _ = self.task_generator.get_enroll_task( selected_labels=[1, 2, 3, 4, 5], num_samples=i) self.inner_training(enroll_data, enroll_labels, self.args.inner_iterations_test) predicted_labels = np.argmax(self.predict(test_data), axis=1) accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100 print("Accuracy after {} shot{} learning: {:.2f}%)".format( i, "" if i == 1 else "s", accuracy)) self.model.load_state_dict(weights_before) # restore from snapshot