Example #1
0
def alignment_lenet(augmentations):
    """Compute the kernel target alignment on LeNet. Since the feature map is
    initialized to be random and then trained, unlike kernels where feature map
    is fixed, kernel target alignment doesn't predict the accuracy at all.
    """
    for augmentation in augmentations:
        print(augmentation.name)
        model_base = LeNet().to(device)
        optimizer = sgd_opt_from_model(model_base)
        # Train LeNet for 1 epoch first
        _ = train_all_epochs(train_loader, valid_loader, model_base, optimizer, 1)
        model = LeNetAug().to(device)
        model.load_state_dict(model_base.state_dict())
        loader = loader_from_dataset(augmentation.dataset)
        print(kernel_target_alignment_augmented(loader, model))
Example #2
0
def main():
    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, testloader = load_CIFAR(batch_size=BATCH_SIZE,
                                         num_workers=NUM_WORKERS)

    CLASSES = []
    AUROCs = []
    auroc = AverageMeter()

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES = [cls]

        print("==> Creating model")
        model = LeNet(num_classes=1)

        if CUDA:
            model = model.cuda()
            model = nn.DataParallel(model)
            cudnn.benchmark = True

        print('    Total params: %.2fK' %
              (sum(p.numel() for p in model.parameters()) / 1000))

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(),
                              lr=LEARNING_RATE,
                              momentum=MOMENTUM,
                              weight_decay=WEIGHT_DECAY)

        print("==> Learning")

        best_loss = 1e10
        learning_rate = LEARNING_RATE

        for epoch in range(EPOCHS):

            # decay learning rate
            if (epoch + 1) % EPOCHS_DROP == 0:
                learning_rate *= LR_DROP
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS))

            train_loss = train(trainloader,
                               model,
                               criterion,
                               CLASSES,
                               CLASSES,
                               optimizer=optimizer,
                               use_cuda=CUDA)
            test_loss = train(testloader,
                              model,
                              criterion,
                              CLASSES,
                              CLASSES,
                              test=True,
                              use_cuda=CUDA)

            # save model
            is_best = test_loss < best_loss
            best_loss = min(test_loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'loss': test_loss,
                    'optimizer': optimizer.state_dict()
                }, CHECKPOINT, is_best)

        print("==> Calculating AUROC")

        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA)
        auroc.update(new_auroc)

        print('New Task AUROC: {}'.format(new_auroc))
        print('Average AUROC: {}'.format(auroc.avg))

        AUROCs.append(auroc.avg)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
def main():
    # Data Loader (Input Pipeline)
    print('loading dataset...')
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               num_workers=args.num_workers,
                                               drop_last=False,
                                               shuffle=False)

    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=batch_size,
                                             num_workers=args.num_workers,
                                             drop_last=False,
                                             shuffle=False)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              num_workers=args.num_workers,
                                              drop_last=False,
                                              shuffle=False)
    # Define models
    print('building model...')
    if args.dataset == 'mnist':
        clf1 = LeNet()
    if args.dataset == 'fashionmnist':
        clf1 = resnet.ResNet18_F(10)
    if args.dataset == 'cifar10':
        clf1 = resnet.ResNet34(10)
    if args.dataset == 'svhn':
        clf1 = resnet.ResNet34(10)

    clf1.cuda()
    optimizer = torch.optim.SGD(clf1.parameters(),
                                lr=args.lr,
                                weight_decay=args.weight_decay)

    with open(txtfile, "a") as myfile:
        myfile.write('epoch train_acc val_acc test_acc\n')

    epoch = 0
    train_acc = 0
    val_acc = 0
    # evaluate models with random weights
    test_acc = evaluate(test_loader, clf1)
    print('Epoch [%d/%d] Test Accuracy on the %s test data: Model1 %.4f %%' %
          (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc))
    # save results
    with open(txtfile, "a") as myfile:
        myfile.write(
            str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' +
            str(test_acc) + ' ' + "\n")

    best_acc = 0.0
    # training
    for epoch in range(1, args.n_epoch_1):
        # train models
        clf1.train()
        train_acc = train(clf1, train_loader, epoch, optimizer,
                          nn.CrossEntropyLoss())
        # validation
        val_acc = evaluate(val_loader, clf1)
        # evaluate models
        test_acc = evaluate(test_loader, clf1)

        # save results
        print(
            'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%'
            % (epoch + 1, args.n_epoch_1, len(train_dataset), train_acc))
        print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' %
              (epoch + 1, args.n_epoch_1, len(val_dataset), val_acc))
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' %
            (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc))
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) +
                ' ' + str(test_acc) + ' ' + "\n")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth')

    print('Matrix Factorization is doing...')
    clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth'))
    A = respresentations_extract(train_loader, clf1, len(train_dataset),
                                 args.dim, batch_size)
    A_val = respresentations_extract(val_loader, clf1, len(val_dataset),
                                     args.dim, batch_size)
    A_total = np.append(A, A_val, axis=0)
    W_total, H_total, error = train_m(A_total, args.basis, args.iteration_nmf,
                                      1e-5)
    for i in range(W_total.shape[0]):
        for j in range(W_total.shape[1]):
            if W_total[i, j] < 1e-6:
                W_total[i, j] = 0.
    W = W_total[0:len(train_dataset), :]
    W_val = W_total[len(train_dataset):, :]
    print('Transition Matrix is estimating...Wating...')
    logits_matrix = probability_extract(train_loader, clf1, len(train_dataset),
                                        args.num_classes, batch_size)
    idx_matrix_group, transition_matrix_group = estimate_matrix(
        logits_matrix, model_save_dir)
    logits_matrix_val = probability_extract(val_loader, clf1, len(val_dataset),
                                            args.num_classes, batch_size)
    idx_matrix_group_val, transition_matrix_group_val = estimate_matrix(
        logits_matrix_val, model_save_dir)
    func = nn.MSELoss()

    model = Matrix_optimize(args.basis, args.num_classes)
    optimizer_1 = torch.optim.Adam(model.parameters(), lr=0.001)
    basis_matrix_group = basis_matrix_optimize(model, optimizer_1, args.basis,
                                               args.num_classes, W,
                                               transition_matrix_group,
                                               idx_matrix_group, func,
                                               model_save_dir, args.n_epoch_4)

    basis_matrix_group_val = basis_matrix_optimize(
        model, optimizer_1, args.basis, args.num_classes, W_val,
        transition_matrix_group_val, idx_matrix_group_val, func,
        model_save_dir, args.n_epoch_4)

    for i in range(basis_matrix_group.shape[0]):
        for j in range(basis_matrix_group.shape[1]):
            for k in range(basis_matrix_group.shape[2]):
                if basis_matrix_group[i, j, k] < 1e-6:
                    basis_matrix_group[i, j, k] = 0.

    optimizer_ = torch.optim.SGD(clf1.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay,
                                 momentum=args.momentum)

    best_acc = 0.0
    for epoch in range(1, args.n_epoch_2):
        # train model
        clf1.train()

        train_acc = train_correction(clf1, train_loader, epoch, optimizer_, W,
                                     basis_matrix_group, batch_size,
                                     args.num_classes, args.basis)
        # validation
        val_acc = val_correction(clf1, val_loader, epoch, W_val,
                                 basis_matrix_group_val, batch_size,
                                 args.num_classes, args.basis)

        # evaluate models
        test_acc = evaluate(test_loader, clf1)
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth')
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) +
                ' ' + str(test_acc) + ' ' + "\n")
        # save results
        print(
            'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%'
            % (epoch + 1, args.n_epoch_2, len(train_dataset), train_acc))
        print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' %
              (epoch + 1, args.n_epoch_2, len(val_dataset), val_acc))
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' %
            (epoch + 1, args.n_epoch_2, len(test_dataset), test_acc))

    clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth'))
    optimizer_r = torch.optim.Adam(clf1.parameters(),
                                   lr=args.lr_revision,
                                   weight_decay=args.weight_decay)
    nn.init.constant_(clf1.T_revision.weight, 0.0)

    for epoch in range(1, args.n_epoch_3):
        # train models
        clf1.train()
        train_acc = train_revision(clf1, train_loader, epoch, optimizer_r, W,
                                   basis_matrix_group, batch_size,
                                   args.num_classes, args.basis)
        # validation
        val_acc = val_revision(clf1, val_loader, epoch, W_val,
                               basis_matrix_group, batch_size,
                               args.num_classes, args.basis)
        # evaluate models
        test_acc = evaluate(test_loader, clf1)
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) +
                ' ' + str(test_acc) + ' ' + "\n")

        # save results
        print(
            'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%'
            % (epoch + 1, args.n_epoch_3, len(train_dataset), train_acc))
        print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' %
              (epoch + 1, args.n_epoch_3, len(val_dataset), val_acc))
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' %
            (epoch + 1, args.n_epoch_3, len(test_dataset), test_acc))
    trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    dataset_train = MNIST('./data/mnist/', train=True, download=True, transform=trans_mnist)
    dataset_test = MNIST('./data/mnist/', train=False, download=True, transform=trans_mnist)
    # sample users
    dict_users = split_noniid_shuffle(dataset_train, args.num_nodes)

    img_size = dataset_train[0][0].shape
    print(img_size)

    net_glob = LeNet().to(args.device)
    print(net_glob.fc1.weight.type())
    print(net_glob)
    net_glob.train()

    # copy weights
    w_glob = net_glob.state_dict()
    w_glob_grad = w_glob

    # training
    #loss_train = []
    
    w_locals = [w_glob for i in range(args.num_nodes)]

    for iter in range(args.epochs):
        loss_locals = []
        for idx in range(args.num_nodes):
            #import pdb; pdb.set_trace()
            local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx])
            w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device), global_grad_data=w_glob_grad)
            w_locals[idx] = copy.deepcopy(w)
            loss_locals.append(copy.deepcopy(loss))
Example #5
0
            top1.add(output[0], target)
            test_loss.add(loss.item())
        t1 = top1.value()[0]
        l = test_loss.value()[0]

    # Report results
    print('[Epoch %2d] Average test loss: %.3f, error: %.2f%%' % (e, l, t1))
    print('%28s: %.3f, error: %.2f%%\n' % ('training loss', lt, t1t))

    return test_loss.value()[0], top1.value()


## Run the above functions

save_path = 'log/'
save_model_path = os.path.join(save_path, 'checkpoint.pth.tar')

for e in range(100):

    train_loss = train(e)
    loss, pct_err = test(e)

    torch.save(
        {
            'epoch': e + 1,
            'state_dict': model.state_dict(),
            'pct_err': pct_err,
            'loss': loss,
            'optimizer': optimizer.state_dict()
        }, save_model_path)
class Reptile(object):
    def __init__(self, args):
        self.args = args
        self._load_model()

        self.model.to(args.device)
        self.task_generator = TaskGen(args.max_num_classes)
        self.outer_stepsize = args.outer_stepsize
        self.criterion = nn.CrossEntropyLoss()
        # self.optimizer = optim.Adam(self.model.parameters(), lr=args.inner_stepsize)

    def _load_model(self):
        self.model = LeNet()
        self.current_iteration = 0
        if os.path.exists(self.args.model_path):
            try:
                print("Loading model from: {}".format(self.args.model_path))
                self.model.load_state_dict(torch.load(self.args.model_path))
                self.current_iteration = joblib.load("{}.iter".format(
                    self.args.model_path))
            except Exception as e:
                print(
                    "Exception: {}\nCould not load model from {} - starting from scratch"
                    .format(e, self.args.model_path))

    def inner_training(self, x, y, num_iterations):
        """
        Run training on task
        """
        x, y = shuffle_unison(x, y)

        self.model.train()

        x = torch.tensor(x, dtype=torch.float, device=self.args.device)
        y = torch.tensor(y, dtype=torch.float, device=self.args.device)

        total_loss = 0
        for _ in range(num_iterations):
            start = np.random.randint(0,
                                      len(x) - self.args.inner_batch_size + 1)

            self.model.zero_grad()
            # self.optimizer.zero_grad()
            outputs = self.model(x[start:start + self.args.inner_batch_size])
            # print("output: {} - y: {}".format(outputs.shape, y.shape))
            loss = self.criterion(
                outputs,
                Variable(y[start:start + self.args.inner_batch_size].long()))
            total_loss += loss
            loss.backward()
            # self.optimizer.step()
            # Similar to calling optimizer.step()
            for param in self.model.parameters():
                param.data -= self.args.inner_stepsize * param.grad.data
        return total_loss / self.args.inner_iterations

    def _meta_gradient_update(self, iteration, num_classes, weights_before):
        """
        Interpolate between current weights and trained weights from this task
        I.e. (weights_before - weights_after) is the meta-gradient

            - iteration: current iteration - used for updating outer_stepsize
            - num_classes: current classifier number of classes
            - weights_before: state of weights before inner steps training
        """
        weights_after = self.model.state_dict()
        outer_stepsize = self.outer_stepsize * (
            1 - iteration / self.args.n_iterations)  # linear schedule

        self.model.load_state_dict({
            name: weights_before[name] +
            (weights_after[name] - weights_before[name]) * outer_stepsize
            for name in weights_before
        })

    def meta_training(self):
        # Reptile training loop
        total_loss = 0
        try:
            while self.current_iteration < self.args.n_iterations:
                # Generate task
                data, labels, original_labels, num_classes = self.task_generator.get_train_task(
                    args.num_classes)

                weights_before = deepcopy(self.model.state_dict())
                loss = self.inner_training(data, labels,
                                           self.args.inner_iterations)
                total_loss += loss
                if self.current_iteration % self.args.log_every == 0:
                    print("-----------------------------")
                    print("iteration               {}".format(
                        self.current_iteration + 1))
                    print("Loss: {:.3f}".format(total_loss /
                                                (self.current_iteration + 1)))
                    print("Current task info: ")
                    print("\t- Number of classes: {}".format(num_classes))
                    print("\t- Batch size: {}".format(len(data)))
                    print("\t- Labels: {}".format(set(original_labels)))

                    self.test()

                self._meta_gradient_update(self.current_iteration, num_classes,
                                           weights_before)

                self.current_iteration += 1

            torch.save(self.model.state_dict(), self.args.model_path)

        except KeyboardInterrupt:
            print("Manual Interrupt...")
            print("Saving to: {}".format(self.args.model_path))
            torch.save(self.model.state_dict(), self.args.model_path)
            joblib.dump(self.current_iteration,
                        "{}.iter".format(self.args.model_path),
                        compress=1)

    def predict(self, x):
        self.model.eval()
        x = torch.tensor(x, dtype=torch.float, device=self.args.device)
        outputs = self.model(x)
        return outputs.cpu().data.numpy()

    def test(self):
        """
        Run tests
            1. Create task from test set.
            2. Reload model
            3. Check accuracy on test set
            4. Train for one or more iterations on one task
            5. Check accuracy again on test set
        """

        test_data, test_labels, _, _ = self.task_generator.get_test_task(
            selected_labels=[1, 2, 3, 4,
                             5], num_samples=-1)  # all available samples
        predicted_labels = np.argmax(self.predict(test_data), axis=1)
        accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100
        print(
            "Accuracy before few shots learning (a.k.a. zero-shot learning): {:.2f}%\n----"
            .format(accuracy))

        weights_before = deepcopy(
            self.model.state_dict())  # save snapshot before evaluation
        for i in range(1, 5):
            enroll_data, enroll_labels, _, _ = self.task_generator.get_enroll_task(
                selected_labels=[1, 2, 3, 4, 5], num_samples=i)
            self.inner_training(enroll_data, enroll_labels,
                                self.args.inner_iterations_test)
            predicted_labels = np.argmax(self.predict(test_data), axis=1)
            accuracy = np.mean(1 * (predicted_labels == test_labels)) * 100

            print("Accuracy after {} shot{} learning: {:.2f}%)".format(
                i, "" if i == 1 else "s", accuracy))

        self.model.load_state_dict(weights_before)  # restore from snapshot
                                          batch_size=param['test_batch_size'],
                                          shuffle=True)

model = LeNet()
model.load_state_dict(
    torch.load('models/lenet_pretrained.pkl', map_location='cpu'))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print("--- Accuracy of Pretrained Model ---")
test(model, loader_test)

# pruning
masks = lenet_prune()
model.set_masks(masks)
print("--- Accuracy After Pruning ---")
test(model, loader_test)

# Retraining
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(),
                                lr=param['learning_rate'],
                                weight_decay=param['weight_decay'])
train(model, criterion, optimizer, param, loader_train)

print("--- Accuracy After Retraining ---")
test(model, loader_test)
prune_rate(model)

# Save and load the entire model
torch.save(model.state_dict(), 'models/lenet_pruned.pkl')
Example #8
0
# optimizer & Loss
optimizer = torch.optim.Adam(net.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

for epoch in range(EPOCH):
    for step, (images, labels) in enumerate(train_loader):

        images = Variable(images)
        labels = Variable(labels)

        optimizer.zero_grad()

        prediction = net(images)
        loss = loss_func(prediction, labels)
        loss.backward()
        optimizer.step()

        if step % 50 == 0:
            test_prediction = net(test_images)
            _, predicted = torch.max(test_prediction.data, 1)
            test_accuracy = sum(predicted == test_labels) / float(
                test_labels.size(0))

            print(
                'Epoch: %d, Step: %d, Training Loss: %.4f, Test Accuracy: %.3f'
                % (epoch, step, loss.data[0], test_accuracy))

            torch.save(net.state_dict(),
                       'saves/dogcat_lenet_params.pkl')  # 只保存网络中的参数(速度快,占内存少)