def alignment_lenet(augmentations): """Compute the kernel target alignment on LeNet. Since the feature map is initialized to be random and then trained, unlike kernels where feature map is fixed, kernel target alignment doesn't predict the accuracy at all. """ for augmentation in augmentations: print(augmentation.name) model_base = LeNet().to(device) optimizer = sgd_opt_from_model(model_base) # Train LeNet for 1 epoch first _ = train_all_epochs(train_loader, valid_loader, model_base, optimizer, 1) model = LeNetAug().to(device) model.load_state_dict(model_base.state_dict()) loader = loader_from_dataset(augmentation.dataset) print(kernel_target_alignment_augmented(loader, model))
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, testloader = load_CIFAR(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS) CLASSES = [] AUROCs = [] auroc = AverageMeter() for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES = [cls] print("==> Creating model") model = LeNet(num_classes=1) if CUDA: model = model.cuda() model = nn.DataParallel(model) cudnn.benchmark = True print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000)) criterion = nn.BCELoss() optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) print("==> Learning") best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS)) train_loss = train(trainloader, model, criterion, CLASSES, CLASSES, optimizer=optimizer, use_cuda=CUDA) test_loss = train(testloader, model, criterion, CLASSES, CLASSES, test=True, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'loss': test_loss, 'optimizer': optimizer.state_dict() }, CHECKPOINT, is_best) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA) auroc.update(new_auroc) print('New Task AUROC: {}'.format(new_auroc)) print('Average AUROC: {}'.format(auroc.avg)) AUROCs.append(auroc.avg) print('\nAverage Per-task Performance over number of tasks') for i, p in enumerate(AUROCs): print("%d: %f" % (i + 1, p))
def main(): # Data Loader (Input Pipeline) print('loading dataset...') train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) # Define models print('building model...') if args.dataset == 'mnist': clf1 = LeNet() if args.dataset == 'fashionmnist': clf1 = resnet.ResNet18_F(10) if args.dataset == 'cifar10': clf1 = resnet.ResNet34(10) if args.dataset == 'svhn': clf1 = resnet.ResNet34(10) clf1.cuda() optimizer = torch.optim.SGD(clf1.parameters(), lr=args.lr, weight_decay=args.weight_decay) with open(txtfile, "a") as myfile: myfile.write('epoch train_acc val_acc test_acc\n') epoch = 0 train_acc = 0 val_acc = 0 # evaluate models with random weights test_acc = evaluate(test_loader, clf1) print('Epoch [%d/%d] Test Accuracy on the %s test data: Model1 %.4f %%' % (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc)) # save results with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") best_acc = 0.0 # training for epoch in range(1, args.n_epoch_1): # train models clf1.train() train_acc = train(clf1, train_loader, epoch, optimizer, nn.CrossEntropyLoss()) # validation val_acc = evaluate(val_loader, clf1) # evaluate models test_acc = evaluate(test_loader, clf1) # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_1, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_1, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc)) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") if val_acc > best_acc: best_acc = val_acc torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth') print('Matrix Factorization is doing...') clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth')) A = respresentations_extract(train_loader, clf1, len(train_dataset), args.dim, batch_size) A_val = respresentations_extract(val_loader, clf1, len(val_dataset), args.dim, batch_size) A_total = np.append(A, A_val, axis=0) W_total, H_total, error = train_m(A_total, args.basis, args.iteration_nmf, 1e-5) for i in range(W_total.shape[0]): for j in range(W_total.shape[1]): if W_total[i, j] < 1e-6: W_total[i, j] = 0. W = W_total[0:len(train_dataset), :] W_val = W_total[len(train_dataset):, :] print('Transition Matrix is estimating...Wating...') logits_matrix = probability_extract(train_loader, clf1, len(train_dataset), args.num_classes, batch_size) idx_matrix_group, transition_matrix_group = estimate_matrix( logits_matrix, model_save_dir) logits_matrix_val = probability_extract(val_loader, clf1, len(val_dataset), args.num_classes, batch_size) idx_matrix_group_val, transition_matrix_group_val = estimate_matrix( logits_matrix_val, model_save_dir) func = nn.MSELoss() model = Matrix_optimize(args.basis, args.num_classes) optimizer_1 = torch.optim.Adam(model.parameters(), lr=0.001) basis_matrix_group = basis_matrix_optimize(model, optimizer_1, args.basis, args.num_classes, W, transition_matrix_group, idx_matrix_group, func, model_save_dir, args.n_epoch_4) basis_matrix_group_val = basis_matrix_optimize( model, optimizer_1, args.basis, args.num_classes, W_val, transition_matrix_group_val, idx_matrix_group_val, func, model_save_dir, args.n_epoch_4) for i in range(basis_matrix_group.shape[0]): for j in range(basis_matrix_group.shape[1]): for k in range(basis_matrix_group.shape[2]): if basis_matrix_group[i, j, k] < 1e-6: basis_matrix_group[i, j, k] = 0. optimizer_ = torch.optim.SGD(clf1.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) best_acc = 0.0 for epoch in range(1, args.n_epoch_2): # train model clf1.train() train_acc = train_correction(clf1, train_loader, epoch, optimizer_, W, basis_matrix_group, batch_size, args.num_classes, args.basis) # validation val_acc = val_correction(clf1, val_loader, epoch, W_val, basis_matrix_group_val, batch_size, args.num_classes, args.basis) # evaluate models test_acc = evaluate(test_loader, clf1) if val_acc > best_acc: best_acc = val_acc torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth') with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_2, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_2, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_2, len(test_dataset), test_acc)) clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth')) optimizer_r = torch.optim.Adam(clf1.parameters(), lr=args.lr_revision, weight_decay=args.weight_decay) nn.init.constant_(clf1.T_revision.weight, 0.0) for epoch in range(1, args.n_epoch_3): # train models clf1.train() train_acc = train_revision(clf1, train_loader, epoch, optimizer_r, W, basis_matrix_group, batch_size, args.num_classes, args.basis) # validation val_acc = val_revision(clf1, val_loader, epoch, W_val, basis_matrix_group, batch_size, args.num_classes, args.basis) # evaluate models test_acc = evaluate(test_loader, clf1) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_3, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_3, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_3, len(test_dataset), test_acc))
trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_train = MNIST('./data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = MNIST('./data/mnist/', train=False, download=True, transform=trans_mnist) # sample users dict_users = split_noniid_shuffle(dataset_train, args.num_nodes) img_size = dataset_train[0][0].shape print(img_size) net_glob = LeNet().to(args.device) print(net_glob.fc1.weight.type()) print(net_glob) net_glob.train() # copy weights w_glob = net_glob.state_dict() w_glob_grad = w_glob # training #loss_train = [] w_locals = [w_glob for i in range(args.num_nodes)] for iter in range(args.epochs): loss_locals = [] for idx in range(args.num_nodes): #import pdb; pdb.set_trace() local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device), global_grad_data=w_glob_grad) w_locals[idx] = copy.deepcopy(w) loss_locals.append(copy.deepcopy(loss))
top1.add(output[0], target) test_loss.add(loss.item()) t1 = top1.value()[0] l = test_loss.value()[0] # Report results print('[Epoch %2d] Average test loss: %.3f, error: %.2f%%' % (e, l, t1)) print('%28s: %.3f, error: %.2f%%\n' % ('training loss', lt, t1t)) return test_loss.value()[0], top1.value() ## Run the above functions save_path = 'log/' save_model_path = os.path.join(save_path, 'checkpoint.pth.tar') for e in range(100): train_loss = train(e) loss, pct_err = test(e) torch.save( { 'epoch': e + 1, 'state_dict': model.state_dict(), 'pct_err': pct_err, 'loss': loss, 'optimizer': optimizer.state_dict() }, save_model_path)
class Reptile(object): def __init__(self, args): self.args = args self._load_model() self.model.to(args.device) self.task_generator = TaskGen(args.max_num_classes) self.outer_stepsize = args.outer_stepsize self.criterion = nn.CrossEntropyLoss() # self.optimizer = optim.Adam(self.model.parameters(), lr=args.inner_stepsize) def _load_model(self): self.model = LeNet() self.current_iteration = 0 if os.path.exists(self.args.model_path): try: print("Loading model from: {}".format(self.args.model_path)) self.model.load_state_dict(torch.load(self.args.model_path)) self.current_iteration = joblib.load("{}.iter".format( self.args.model_path)) except Exception as e: print( "Exception: {}\nCould not load model from {} - starting from scratch" .format(e, self.args.model_path)) def inner_training(self, x, y, num_iterations): """ Run training on task """ x, y = shuffle_unison(x, y) self.model.train() x = torch.tensor(x, dtype=torch.float, device=self.args.device) y = torch.tensor(y, dtype=torch.float, device=self.args.device) total_loss = 0 for _ in range(num_iterations): start = np.random.randint(0, len(x) - self.args.inner_batch_size + 1) self.model.zero_grad() # self.optimizer.zero_grad() outputs = self.model(x[start:start + self.args.inner_batch_size]) # print("output: {} - y: {}".format(outputs.shape, y.shape)) loss = self.criterion( outputs, Variable(y[start:start + self.args.inner_batch_size].long())) total_loss += loss loss.backward() # self.optimizer.step() # Similar to calling optimizer.step() for param in self.model.parameters(): param.data -= self.args.inner_stepsize * param.grad.data return total_loss / self.args.inner_iterations def _meta_gradient_update(self, iteration, num_classes, weights_before): """ Interpolate between current weights and trained weights from this task I.e. (weights_before - weights_after) is the meta-gradient - iteration: current iteration - used for updating outer_stepsize - num_classes: current classifier number of classes - weights_before: state of weights before inner steps training """ weights_after = self.model.state_dict() outer_stepsize = self.outer_stepsize * ( 1 - iteration / self.args.n_iterations) # linear schedule self.model.load_state_dict({ name: weights_before[name] + (weights_after[name] - weights_before[name]) * outer_stepsize for name in weights_before }) def meta_training(self): # Reptile training loop total_loss = 0 try: while self.current_iteration < self.args.n_iterations: # Generate task data, labels, original_labels, num_classes = self.task_generator.get_train_task( args.num_classes) weights_before = deepcopy(self.model.state_dict()) loss = self.inner_training(data, labels, self.args.inner_iterations) total_loss += loss if self.current_iteration % self.args.log_every == 0: print("-----------------------------") print("iteration {}".format( self.current_iteration + 1)) print("Loss: {:.3f}".format(total_loss / (self.current_iteration + 1))) print("Current task info: ") print("\t- Number of classes: {}".format(num_classes)) print("\t- Batch size: {}".format(len(data))) print("\t- Labels: {}".format(set(original_labels))) self.test() self._meta_gradient_update(self.current_iteration, num_classes, weights_before) self.current_iteration += 1 torch.save(self.model.state_dict(), self.args.model_path) except KeyboardInterrupt: print("Manual Interrupt...") print("Saving to: {}".format(self.args.model_path)) torch.save(self.model.state_dict(), self.args.model_path) joblib.dump(self.current_iteration, "{}.iter".format(self.args.model_path), compress=1) def predict(self, x): self.model.eval() x = torch.tensor(x, dtype=torch.float, device=self.args.device) outputs = self.model(x) return outputs.cpu().data.numpy() def test(self): """ Run tests 1. Create task from test set. 2. Reload model 3. Check accuracy on test set 4. Train for one or more iterations on one task 5. Check accuracy again on test set """ test_data, test_labels, _, _ = self.task_generator.get_test_task( selected_labels=[1, 2, 3, 4, 5], num_samples=-1) # all available samples predicted_labels = np.argmax(self.predict(test_data), axis=1) accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100 print( "Accuracy before few shots learning (a.k.a. zero-shot learning): {:.2f}%\n----" .format(accuracy)) weights_before = deepcopy( self.model.state_dict()) # save snapshot before evaluation for i in range(1, 5): enroll_data, enroll_labels, _, _ = self.task_generator.get_enroll_task( selected_labels=[1, 2, 3, 4, 5], num_samples=i) self.inner_training(enroll_data, enroll_labels, self.args.inner_iterations_test) predicted_labels = np.argmax(self.predict(test_data), axis=1) accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100 print("Accuracy after {} shot{} learning: {:.2f}%)".format( i, "" if i == 1 else "s", accuracy)) self.model.load_state_dict(weights_before) # restore from snapshot
batch_size=param['test_batch_size'], shuffle=True) model = LeNet() model.load_state_dict( torch.load('models/lenet_pretrained.pkl', map_location='cpu')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) print("--- Accuracy of Pretrained Model ---") test(model, loader_test) # pruning masks = lenet_prune() model.set_masks(masks) print("--- Accuracy After Pruning ---") test(model, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(model, criterion, optimizer, param, loader_train) print("--- Accuracy After Retraining ---") test(model, loader_test) prune_rate(model) # Save and load the entire model torch.save(model.state_dict(), 'models/lenet_pruned.pkl')
# optimizer & Loss optimizer = torch.optim.Adam(net.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss() for epoch in range(EPOCH): for step, (images, labels) in enumerate(train_loader): images = Variable(images) labels = Variable(labels) optimizer.zero_grad() prediction = net(images) loss = loss_func(prediction, labels) loss.backward() optimizer.step() if step % 50 == 0: test_prediction = net(test_images) _, predicted = torch.max(test_prediction.data, 1) test_accuracy = sum(predicted == test_labels) / float( test_labels.size(0)) print( 'Epoch: %d, Step: %d, Training Loss: %.4f, Test Accuracy: %.3f' % (epoch, step, loss.data[0], test_accuracy)) torch.save(net.state_dict(), 'saves/dogcat_lenet_params.pkl') # 只保存网络中的参数(速度快,占内存少)