num_workers=0,
)

# For testing just do everything in one giant batch
testloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=len(dataset_test),
    shuffle=False,
    num_workers=0,
)

model = FeedForward(input_size=train_inputs.shape[1],
                    output_size=train_outputs.shape[1])

if args.load_saved_model:
    model.load_state_dict(torch.load(args.load_saved_model), strict=False)

# Open a tensorboard writer if a logging directory is given
if args.logdir != '':
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    save_dir = os.path.join(args.logdir, current_time)
    writer = SummaryWriter(log_dir=save_dir)
    if args.weight_histogram:
        # Log the initial parameters
        for name, param in model.named_parameters():
            writer.add_histogram('parameters/' + name,
                                 param.clone().cpu().data.numpy(), 0)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),
                            lr=args.lr,
예제 #2
0
def main():

    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    CLASSES = []
    AUROCs = []
    auroc = AverageMeter()

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES = [cls]

        print("==> Creating model")
        model = FeedForward(num_classes=1)

        if CUDA:
            model = model.cuda()
            model = nn.DataParallel(model)
            cudnn.benchmark = True

        print('    Total params: %.2fK' %
              (sum(p.numel() for p in model.parameters()) / 1000))

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(),
                              lr=LEARNING_RATE,
                              momentum=MOMENTUM,
                              weight_decay=WEIGHT_DECAY)

        print("==> Learning")

        best_loss = 1e10
        learning_rate = LEARNING_RATE

        for epoch in range(EPOCHS):

            # decay learning rate
            if (epoch + 1) % EPOCHS_DROP == 0:
                learning_rate *= LR_DROP
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS))

            train_loss = train(trainloader,
                               model,
                               criterion,
                               CLASSES,
                               CLASSES,
                               optimizer=optimizer,
                               use_cuda=CUDA)
            test_loss = train(validloader,
                              model,
                              criterion,
                              CLASSES,
                              CLASSES,
                              test=True,
                              use_cuda=CUDA)

            # save model
            is_best = test_loss < best_loss
            best_loss = min(test_loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'loss': test_loss,
                    'optimizer': optimizer.state_dict()
                }, CHECKPOINT, is_best)

        print("==> Calculating AUROC")

        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA)
        auroc.update(new_auroc)

        print('New Task AUROC: {}'.format(new_auroc))
        print('Average AUROC: {}'.format(auroc.avg))

        AUROCs.append(auroc.avg)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
예제 #3
0
파일: den.py 프로젝트: AuMgLi/LLDEN
def main():
    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    print("==> Creating model")
    model = FeedForward(num_classes=len(ALL_CLASSES))

    if CUDA:
        model = model.cuda()
        # model = nn.DataParallel(model)
        cudnn.benchmark = True

    # initialize parameters
    # for name, param in model.named_parameters():
    #     if 'bias' in name:
    #         param.data.zero_()
    #     elif 'weight' in name:
    #         param.data.normal_(0, 0.005)

    print('    Total params: %.2fK' %
          (sum(p.numel() for p in model.parameters()) / 1000))

    criterion = nn.BCELoss()

    CLASSES = []
    AUROCs = []

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES.append(cls)

        if t == 0:
            print("==> Learning")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE
            # epochs = 10
            for epoch in range(MAX_EPOCHS):
                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   penalty=penalty,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  penalty=penalty,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

                suma = 0
                for p in model.parameters():
                    p = p.data.cpu().numpy()
                    suma += (abs(p) < ZERO_THRESHOLD).sum()
                print("Number of zero weights: %d" % suma)
        else:  # if t != 0
            # copy model
            model_copy = copy.deepcopy(model)

            print("==> Selective Retraining")

            # Solve Eq.3

            # freeze all layers except the last one (last 2 parameters)
            params = list(model.parameters())
            for param in params[:-2]:
                param.requires_grad = False

            optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train(trainloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      optimizer=optimizer,
                      penalty=penalty,
                      use_cuda=CUDA)
                train(validloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      test=True,
                      penalty=penalty,
                      use_cuda=CUDA)

            for param in model.parameters():
                param.requires_grad = True

            print("==> Selecting Neurons")
            hooks = select_neurons(model, t)

            print("==> Training Selected Neurons")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=1e-4)

            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

            # remove hooks
            for hook in hooks:
                hook.remove()

            print("==> Splitting Neurons")
            split_neurons(model_copy, model)

        print("==> Calculating AUROC")
        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA)

        print('AUROC: {}'.format(auroc))

        AUROCs.append(auroc)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
import sys
import torch
import torch.nn as nn
from models import FeedForward
from toy_dataset import ToyDataset, plot_data
import matplotlib.pyplot as plt
import numpy as np

fname = sys.argv[1]

n_samples = 10000
hidden_size = 512

model = FeedForward(input_size=2, hidden_size=hidden_size, output_size=4)
model.load_state_dict(torch.load(fname), strict=True)
model.eval()

dataset_test = ToyDataset(n_samples)

# For testing just do everything in one giant batch
testloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=len(dataset_test),
    shuffle=False,
    num_workers=0,
)

criterion = nn.CrossEntropyLoss()

with torch.no_grad():
    # Everything is in one batch, so this loop will only happen once
예제 #5
0
    x_env, y_env = env.object_locations[sp_name][[0, 1]]

    # Need to scale to SSP coordinates
    # Env is 0 to 13, SSP is -5 to 5
    x = ((x_env - 0) / coarse_size) * limit_range + xs[0]
    y = ((y_env - 0) / coarse_size) * limit_range + ys[0]

    item_memory += vocab[sp_name] * encode_point(x, y, x_axis_sp, y_axis_sp)
item_memory.normalize()

# Component functions of the full system

cleanup_network = FeedForward(input_size=ssp_dim,
                              hidden_size=512,
                              output_size=ssp_dim)
cleanup_network.load_state_dict(torch.load(args.cleanup_network), strict=True)
cleanup_network.eval()

# Input is x and y velocity plus the distance sensor measurements, plus map ID
localization_network = LocalizationModel(
    input_size=2 + n_sensors + n_maps,
    unroll_length=1,  #rollout_length,
    sp_dim=ssp_dim)
localization_network.load_state_dict(torch.load(args.localization_network),
                                     strict=True)
localization_network.eval()

if args.n_hidden_layers_policy == 1:
    policy_network = FeedForward(input_size=id_size + ssp_dim * 2,
                                 output_size=2)
else: