num_workers=0, ) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) model = FeedForward(input_size=train_inputs.shape[1], output_size=train_outputs.shape[1]) if args.load_saved_model: model.load_state_dict(torch.load(args.load_saved_model), strict=False) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = os.path.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0) criterion = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS) CLASSES = [] AUROCs = [] auroc = AverageMeter() for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES = [cls] print("==> Creating model") model = FeedForward(num_classes=1) if CUDA: model = model.cuda() model = nn.DataParallel(model) cudnn.benchmark = True print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000)) criterion = nn.BCELoss() optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) print("==> Learning") best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS)) train_loss = train(trainloader, model, criterion, CLASSES, CLASSES, optimizer=optimizer, use_cuda=CUDA) test_loss = train(validloader, model, criterion, CLASSES, CLASSES, test=True, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'loss': test_loss, 'optimizer': optimizer.state_dict() }, CHECKPOINT, is_best) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA) auroc.update(new_auroc) print('New Task AUROC: {}'.format(new_auroc)) print('Average AUROC: {}'.format(auroc.avg)) AUROCs.append(auroc.avg) print('\nAverage Per-task Performance over number of tasks') for i, p in enumerate(AUROCs): print("%d: %f" % (i + 1, p))
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS) print("==> Creating model") model = FeedForward(num_classes=len(ALL_CLASSES)) if CUDA: model = model.cuda() # model = nn.DataParallel(model) cudnn.benchmark = True # initialize parameters # for name, param in model.named_parameters(): # if 'bias' in name: # param.data.zero_() # elif 'weight' in name: # param.data.normal_(0, 0.005) print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000)) criterion = nn.BCELoss() CLASSES = [] AUROCs = [] for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES.append(cls) if t == 0: print("==> Learning") optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) penalty = L1Penalty(coeff=L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE # epochs = 10 for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, penalty=penalty, use_cuda=CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, penalty=penalty, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) suma = 0 for p in model.parameters(): p = p.data.cpu().numpy() suma += (abs(p) < ZERO_THRESHOLD).sum() print("Number of zero weights: %d" % suma) else: # if t != 0 # copy model model_copy = copy.deepcopy(model) print("==> Selective Retraining") # Solve Eq.3 # freeze all layers except the last one (last 2 parameters) params = list(model.parameters()) for param in params[:-2]: param.requires_grad = False optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) penalty = L1Penalty(coeff=L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, penalty=penalty, use_cuda=CUDA) train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, penalty=penalty, use_cuda=CUDA) for param in model.parameters(): param.requires_grad = True print("==> Selecting Neurons") hooks = select_neurons(model, t) print("==> Training Selected Neurons") optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=1e-4) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, use_cuda=CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) # remove hooks for hook in hooks: hook.remove() print("==> Splitting Neurons") split_neurons(model_copy, model) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA) print('AUROC: {}'.format(auroc)) AUROCs.append(auroc) print('\nAverage Per-task Performance over number of tasks') for i, p in enumerate(AUROCs): print("%d: %f" % (i + 1, p))
import sys import torch import torch.nn as nn from models import FeedForward from toy_dataset import ToyDataset, plot_data import matplotlib.pyplot as plt import numpy as np fname = sys.argv[1] n_samples = 10000 hidden_size = 512 model = FeedForward(input_size=2, hidden_size=hidden_size, output_size=4) model.load_state_dict(torch.load(fname), strict=True) model.eval() dataset_test = ToyDataset(n_samples) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) criterion = nn.CrossEntropyLoss() with torch.no_grad(): # Everything is in one batch, so this loop will only happen once
x_env, y_env = env.object_locations[sp_name][[0, 1]] # Need to scale to SSP coordinates # Env is 0 to 13, SSP is -5 to 5 x = ((x_env - 0) / coarse_size) * limit_range + xs[0] y = ((y_env - 0) / coarse_size) * limit_range + ys[0] item_memory += vocab[sp_name] * encode_point(x, y, x_axis_sp, y_axis_sp) item_memory.normalize() # Component functions of the full system cleanup_network = FeedForward(input_size=ssp_dim, hidden_size=512, output_size=ssp_dim) cleanup_network.load_state_dict(torch.load(args.cleanup_network), strict=True) cleanup_network.eval() # Input is x and y velocity plus the distance sensor measurements, plus map ID localization_network = LocalizationModel( input_size=2 + n_sensors + n_maps, unroll_length=1, #rollout_length, sp_dim=ssp_dim) localization_network.load_state_dict(torch.load(args.localization_network), strict=True) localization_network.eval() if args.n_hidden_layers_policy == 1: policy_network = FeedForward(input_size=id_size + ssp_dim * 2, output_size=2) else: