Beispiel #1
0
def run(dataset, net_type):

    # Hyper Parameter settings
    layer_type = cfg.layer_type
    activation_type = cfg.activation_type

    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size
    beta_type = cfg.beta_type

    trainset, testset, inputs, outputs = data.getDataset(dataset)
    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs, layer_type,
                   activation_type).to(device)

    ckpt_dir = f'checkpoints/{dataset}/bayesian'
    ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = metrics.ELBO(len(trainset)).to(device)
    optimizer = Adam(net.parameters(), lr=lr_start)
    lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer,
                                              patience=6,
                                              verbose=True)
    valid_loss_max = np.Inf
    for epoch in range(n_epochs):  # loop over the dataset multiple times
        cfg.curr_epoch_no = epoch

        train_loss, train_acc, train_kl = train_model(net,
                                                      optimizer,
                                                      criterion,
                                                      train_loader,
                                                      num_ens=train_ens,
                                                      beta_type=beta_type)
        valid_loss, valid_acc = validate_model(net,
                                               criterion,
                                               valid_loader,
                                               num_ens=valid_ens)
        lr_sched.step(valid_loss)

        print(
            'Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f} \ttrain_kl_div: {:.4f}'
            .format(epoch, train_loss, train_acc, valid_loss, valid_acc,
                    train_kl))

        # save model if validation accuracy has increased
        if valid_loss <= valid_loss_max:
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_max, valid_loss))
            torch.save(net.state_dict(), ckpt_name)
            valid_loss_max = valid_loss
Beispiel #2
0
def run(dataset, net_type, train=True):

    # Hyper Parameter settings
    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    test_ens = cfg.test_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size

    trainset, testset, inputs, outputs = data.getDataset_regression(dataset)

    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs).to(device)

    print(len(train_loader))
    print(len(valid_loader))
    print(len(test_loader))

    ckpt_dir = f'checkpoints/regression/{dataset}/bayesian'
    ckpt_name = f'checkpoints/regression/{dataset}/bayesian/model_{net_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = metrics.ELBO_regression_hetero(len(trainset)).to(device)

    if train:
        optimizer = Adam(net.parameters(), lr=lr_start)
        valid_loss_max = np.Inf
        for epoch in range(n_epochs):  # loop over the dataset multiple times
            cfg.curr_epoch_no = epoch
            utils.adjust_learning_rate(optimizer, metrics.lr_linear(epoch, 0, n_epochs, lr_start))

            train_loss, train_mse, train_kl = train_model(net, optimizer, criterion, train_loader, num_ens=train_ens)
            valid_loss, valid_mse = validate_model(net, criterion, valid_loader, num_ens=valid_ens)

            print('Epoch: {} \tTraining Loss: {:.4f} \tTraining MSE: {:.4f} \tValidation Loss: {:.4f} \tValidation MSE: {:.4f} \ttrain_kl_div: {:.4f}'.format(
                epoch, train_loss, train_mse, valid_loss, valid_mse, train_kl))

            # save model if validation MSE has increased
            if valid_loss <= valid_loss_max:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                    valid_loss_max, valid_loss))
                torch.save(net.state_dict(), ckpt_name)
                valid_loss_max = valid_loss

    # test saved model
    best_model = getModel(net_type, inputs, outputs).to(device)
    best_model.load_state_dict(torch.load(ckpt_name))
    test_loss, test_mse = test_model(best_model, criterion, test_loader, num_ens=test_ens)
    print('Test Loss: {:.4f} \tTest MSE: {:.4f} '.format(
            test_loss, test_mse))
    test_uncertainty(best_model, testset[:100], data='ccpp')
Beispiel #3
0
def get_splitmnist_dataloaders(num_tasks, return_datasets=False):
    loaders = []
    datasets = _get_splitmnist_datasets(num_tasks)
    for i in range(1, num_tasks + 1):
        trainset, testset, _, _ = datasets[i-1]
        curr_loaders = data.getDataloader(
            trainset, testset, cfg.valid_size, cfg.batch_size, cfg.num_workers)
        loaders.append(curr_loaders)  # (train_loader, valid_loader, test_loader)
    if return_datasets:
        return loaders, datasets
    return loaders
Beispiel #4
0
def run(dataset, net_type):

    # Hyper Parameter settings
    n_epochs = cfg.n_epochs
    lr = cfg.lr
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size

    trainset, testset, inputs, outputs = data.getDataset(dataset)
    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs).to(device)

    ckpt_dir = f'checkpoints/{dataset}/frequentist'
    ckpt_name = f'checkpoints/{dataset}/frequentist/model_{net_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(net.parameters(), lr=lr)
    lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer,
                                              patience=6,
                                              verbose=True)
    valid_loss_min = np.Inf
    for epoch in range(1, n_epochs + 1):

        train_loss, train_acc = train_model(net, optimizer, criterion,
                                            train_loader)
        valid_loss, valid_acc = validate_model(net, criterion, valid_loader)
        lr_sched.step(valid_loss)

        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)

        print(
            'Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f}'
            .format(epoch, train_loss, train_acc, valid_loss, valid_acc))

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_min, valid_loss))
            torch.save(net.state_dict(), ckpt_name)
            valid_loss_min = valid_loss
Beispiel #5
0
def test():
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


if __name__ == "__main__":
    train_set, test_set, inputs, num_classes = data.getDataset()
    train_loader, valid_loader, test_loader = data.getDataloader(
        train_set, test_set, param.valid_size, param.batch_size_train,
        param.batch_size_test, param.num_workers)
    network = Net()

    optimizer = optim.SGD(network.parameters(),
                          lr=param.learning_rate,
                          momentum=param.momentum)
    test()
    for epoch in range(1, param.n_epochs + 1):
        train(epoch)
        test()
Beispiel #6
0
def run(dataset, net_type, train=True):

    # Hyper Parameter settings
    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    test_ens = cfg.test_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size

    trainset, testset, inputs, outputs = data.getDataset_regression(dataset)

    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs).to(device)

    print(len(train_loader))
    print(len(valid_loader))
    print(len(test_loader))

    ckpt_dir = f'checkpoints/regression/{dataset}/' + name
    ckpt_name = f'checkpoints/regression/{dataset}/'+ name + '/model_{net_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = metrics.ELBO_regression(len(trainset)).to(device)
    # criterion = metrics.ELBO_regression(len(train_loader)).to(device)

    kl_cost_train = np.zeros(n_epochs)
    pred_cost_train = np.zeros(n_epochs)
    mse_train = np.zeros(n_epochs)

    kl_cost_val = np.zeros(n_epochs)
    pred_cost_val = np.zeros(n_epochs)
    mse_val = np.zeros(n_epochs)

    if train:
        optimizer = Adam(net.parameters(), lr=lr_start)
        valid_loss_max = np.Inf
        for epoch in range(n_epochs):  # loop over the dataset multiple times
            cfg.curr_epoch_no = epoch
            utils.adjust_learning_rate(optimizer, metrics.lr_linear(epoch, 0, n_epochs, lr_start))

            train_loss, train_mse, train_kl, train_pred = train_model(net, optimizer, criterion, train_loader, num_ens=train_ens)
            valid_loss, valid_mse, valid_kl, valid_pred = validate_model(net, criterion, valid_loader, num_ens=valid_ens)

            kl_cost_train[epoch] = train_kl
            pred_cost_train[epoch] = train_pred
            mse_train[epoch] = train_mse

            kl_cost_val[epoch] = valid_kl
            pred_cost_val[epoch] = valid_pred
            mse_val[epoch] = valid_mse

            print('Epoch: {} \ttra loss: {:.4f} \ttra_kl: {:.4f} \ttra_pred: {:.4f} \ttra MSE: {:.4f} \nval loss: {:.4f} \tVal kl: {:.4f}  \tval_pred: {:.4f} \tval MSE: {:.4f} '
                .format(
                epoch, train_loss, train_kl, train_pred, train_mse, valid_loss, valid_kl, valid_pred, valid_mse))

            # save model if validation MSE has increased
            if valid_loss <= valid_loss_max:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                    valid_loss_max, valid_loss))
                torch.save(net.state_dict(), ckpt_name)
                valid_loss_max = valid_loss

            # fig cost vs its

        textsize = 15
        marker = 5

        plt.figure(dpi=100)
        fig, ax1 = plt.subplots()
        ax1.plot(pred_cost_train[20:], 'r--')
        ax1.plot(pred_cost_val[20:], 'b-')
        ax1.set_ylabel('Pred_loss')
        plt.xlabel('epoch')
        plt.grid(b=True, which='major', color='k', linestyle='-')
        plt.grid(b=True, which='minor', color='k', linestyle='--')
        lgd = plt.legend(['train error', 'test error'], markerscale=marker, prop={'size': textsize, 'weight': 'normal'})
        ax = plt.gca()
        plt.title('Regression costs')
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                     ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(textsize)
            item.set_weight('normal')
        plt.savefig(ckpt_dir + '/pred_cost.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

        plt.figure()
        fig, ax1 = plt.subplots()
        ax1.plot(kl_cost_train, 'r')
        ax1.plot(kl_cost_val, 'b')
        ax1.set_ylabel('nats?')
        plt.xlabel('epoch')
        plt.grid(b=True, which='major', color='k', linestyle='-')
        plt.grid(b=True, which='minor', color='k', linestyle='--')
        ax = plt.gca()
        plt.title('DKL (per sample)')
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                     ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(textsize)
            item.set_weight('normal')
        plt.savefig(ckpt_dir + '/KL_cost.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

        plt.figure(dpi=100)
        fig2, ax2 = plt.subplots()
        ax2.set_ylabel('% error')
        ax2.plot(mse_val[20:], 'b-')
        ax2.plot(mse_train[20:], 'r--')
        plt.xlabel('epoch')
        plt.grid(b=True, which='major', color='k', linestyle='-')
        plt.grid(b=True, which='minor', color='k', linestyle='--')
        ax2.get_yaxis().set_minor_formatter(matplotlib.ticker.ScalarFormatter())
        ax2.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
        lgd = plt.legend(['val mse', 'train mse'], markerscale=marker, prop={'size': textsize, 'weight': 'normal'})
        ax = plt.gca()
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                     ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(textsize)
            item.set_weight('normal')
        plt.savefig(ckpt_dir + '/mse.png', bbox_extra_artists=(lgd,), box_inches='tight')

    # test saved model
    best_model = getModel(net_type, inputs, outputs).to(device)
    best_model.load_state_dict(torch.load(ckpt_name))
    test_loss, test_mse = test_model(best_model, criterion, test_loader, num_ens=test_ens)
    print('Test Loss: {:.4f} \tTest MSE: {:.4f} '.format(
            test_loss, test_mse))
    test_uncertainty(best_model, testset[:500], data='uci_har')
Beispiel #7
0
def run(dataset,
        net_type,
        checkpoint='None',
        prune_criterion='EmptyCrit',
        pruning_limit=0.0,
        lower_limit=0.5,
        local_pruning=False):
    # Hyper Parameter settings
    layer_type = cfg.layer_type
    activation_type = cfg.activation_type
    priors = cfg.priors

    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size
    beta_type = cfg.beta_type

    # LOAD STRUCTURED PRUNED MODEL
    if net_type == 'customconv6':
        import pickle
        with open('/nfs/homedirs/ayle/model_conv6_0.5.pickle', 'rb') as f:
            pre_pruned_model = pickle.load(f)
    else:
        pre_pruned_model = None

    trainset, testset, inputs, outputs = data.getDataset(dataset)
    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs, priors, layer_type,
                   activation_type, pre_pruned_model).to(device)

    # LOAD PRUNED UNSTRUCTURED MASK
    # import pickle
    # with open('/nfs/homedirs/ayle/mask.pickle', 'rb') as f:
    #     mask = pickle.load(f)
    #
    # mask_keys = list(mask.keys())
    #
    # count = 0
    # for name, module in net.named_modules():
    #     if name.startswith('conv') or name.startswith('fc'):
    #         module.mask = mask[mask_keys[count]]
    #         count += 1
    #         print(module.mask.sum().float() / torch.numel(module.mask))

    ckpt_dir = f'checkpoints/{dataset}/bayesian'
    ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}_{activation_type}_{prune_criterion}_{pruning_limit}_after.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    if checkpoint != 'None':
        net.load_state_dict(torch.load(checkpoint))

    if layer_type == 'mgp':
        criterion = metrics.ELBO2(len(trainset)).to(device)
    else:
        criterion = metrics.ELBO(len(trainset)).to(device)
    optimizer = Adam(net.parameters(), lr=lr_start)
    lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer,
                                              patience=6,
                                              verbose=True)
    valid_loss_max = np.Inf

    if prune_criterion == 'SNIPit':
        pruning_criterion = SNIPit(limit=pruning_limit,
                                   model=net,
                                   lower_limit=lower_limit)
        pruning_criterion.prune(pruning_limit,
                                train_loader=train_loader,
                                local=local_pruning)
    elif prune_criterion == 'SNR':
        pruning_criterion = SNR(limit=pruning_limit,
                                model=net,
                                lower_limit=lower_limit)
        pruning_criterion.prune(pruning_limit,
                                train_loader=train_loader,
                                local=local_pruning)
    elif prune_criterion == 'StructuredSNR':
        pruning_criterion = StructuredSNR(limit=pruning_limit,
                                          model=net,
                                          lower_limit=lower_limit)
        # pruning_criterion.prune(pruning_limit, train_loader=train_loader, local=local_pruning)

    init_num_params = sum([
        np.prod(x.shape) for name, x in net.named_parameters()
        if "W_mu" in name
    ])
    new_num_params = init_num_params

    for epoch in range(n_epochs):  # loop over the dataset multiple times

        train_loss, train_acc, train_kl = train_model(net,
                                                      optimizer,
                                                      criterion,
                                                      train_loader,
                                                      num_ens=train_ens,
                                                      beta_type=beta_type,
                                                      epoch=epoch,
                                                      num_epochs=n_epochs,
                                                      layer_type=layer_type)
        valid_loss, valid_acc, _ = validate_model(net,
                                                  criterion,
                                                  valid_loader,
                                                  num_ens=valid_ens,
                                                  beta_type=beta_type,
                                                  epoch=epoch,
                                                  num_epochs=n_epochs,
                                                  layer_type=layer_type)
        lr_sched.step(valid_loss)

        print(
            'Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f} \ttrain_kl_div: {:.4f}'
            .format(epoch, train_loss, train_acc, valid_loss, valid_acc,
                    train_kl))

        # save model if validation accuracy has increased
        if valid_loss <= valid_loss_max:
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_max, valid_loss))
            torch.save(net.state_dict(), ckpt_name)
            valid_loss_max = valid_loss

        # if epoch == 0 or epoch == 1:
        # if (epoch % 40 == 0) and (epoch > 1) and (epoch < 200) and (1 - new_num_params / init_num_params) < pruning_limit:
        #     net.zero_grad()
        #     optimizer.zero_grad()
        #
        #     with torch.no_grad():
        #         pruning_criterion.prune(0.1, train_loader=train_loader, local=local_pruning)
        #
        #     import pickle
        #     with open('testt', 'wb') as f:
        #         pickle.dump(net, f)
        #
        #     with open('testt', 'rb') as f:
        #         net = pickle.load(f).to(device)
        #
        #     net.post_init_implementation()
        #     criterion = metrics.ELBO(len(trainset)).to(device)
        #     optimizer = Adam(net.parameters(), lr=lr_start)
        #     lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, verbose=True)
        #     valid_loss_max = np.Inf
        #     pruning_criterion = StructuredSNR(limit=pruning_limit, model=net, lower_limit=lower_limit)
        #
        #     new_num_params = sum([np.prod(x.shape) for name, x in net.named_parameters() if "W_mu" in name])
        #     print('Overall sparsity', 1 - new_num_params / init_num_params)

    # import pickle
    # with open(ckpt_name, 'wb') as f:
    #     pickle.dump(net, f)
    torch.save(net.state_dict(), ckpt_name)