) model = FeedForward(input_size=train_inputs.shape[1], output_size=train_outputs.shape[1]) if args.load_saved_model: model.load_state_dict(torch.load(args.load_saved_model), strict=False) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = os.path.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0) criterion = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for e in range(args.epochs): print('Epoch: {0}'.format(e + 1)) avg_loss = 0 n_batches = 0 for i, data in enumerate(trainloader):
def main(): parser = argparse.ArgumentParser( 'Train a simple classifier on a toy dataset') parser.add_argument('--dataset', type=str, default='') parser.add_argument('--train-fraction', type=float, default=.5, help='proportion of the dataset to use for training') parser.add_argument('--n-samples', type=int, default=10000) parser.add_argument('--hidden-size', type=int, default=512, help='Hidden size of the cleanup network') parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--seed', type=int, default=13) parser.add_argument('--logdir', type=str, default='trained_models/simple_classifier', help='Directory for saved model and tensorboard log') parser.add_argument('--load-model', type=str, default='', help='Optional model to continue training from') parser.add_argument( '--name', type=str, default='', help= 'Name of output folder within logdir. Will use current date and time if blank' ) parser.add_argument('--weight-histogram', action='store_true', help='Save histograms of the weights if set') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) rng = np.random.RandomState(seed=args.seed) dataset_train = ToyDataset(args.n_samples) dataset_test = ToyDataset(args.n_samples) trainloader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=0, ) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) model = FeedForward(input_size=2, hidden_size=args.hidden_size, output_size=4) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = osp.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0) criterion = nn.CrossEntropyLoss() # criterion = nn.NLLLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for e in range(args.epochs): print('Epoch: {0}'.format(e + 1)) avg_loss = 0 n_batches = 0 for i, data in enumerate(trainloader): locations, labels = data if locations.size()[0] != args.batch_size: continue # Drop data, not enough for a batch optimizer.zero_grad() # outputs = torch.max(model(locations), 1)[1].unsqueeze(1) outputs = model(locations) loss = criterion(outputs, labels) avg_loss += loss.data.item() n_batches += 1 loss.backward() # print(loss.data.item()) optimizer.step() print(avg_loss / n_batches) if args.logdir != '': if n_batches > 0: avg_loss /= n_batches writer.add_scalar('avg_loss', avg_loss, e + 1) if args.weight_histogram and (e + 1) % 10 == 0: for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), e + 1) print("Testing") with torch.no_grad(): # Everything is in one batch, so this loop will only happen once for i, data in enumerate(testloader): locations, labels = data outputs = model(locations) loss = criterion(outputs, labels) print(loss.data.item()) if args.logdir != '': # TODO: get a visualization of the performance writer.add_scalar('test_loss', loss.data.item()) # Close tensorboard writer if args.logdir != '': writer.close() torch.save(model.state_dict(), osp.join(save_dir, 'model.pt')) params = vars(args) with open(osp.join(save_dir, "params.json"), "w") as f: json.dump(params, f)
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, validloader, testloader = load_MNIST(batch_size = BATCH_SIZE, num_workers = NUM_WORKERS) print("==> Creating model") model = FeedForward(num_classes=len(ALL_CLASSES)) if CUDA: model = model.cuda() model = nn.DataParallel(model) cudnn.benchmark = True # initialize parameters for name, param in model.named_parameters(): if 'bias' in name: param.data.zero_() elif 'weight' in name: param.data.normal_(0,0.005) print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000) ) criterion = nn.BCELoss() CLASSES = [] AUROCs = [] for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES.append(cls) if t == 0: print("==> Learning") optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY ) penalty = l1_penalty(coeff = L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE # epochs = 10 for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer = optimizer, penalty = penalty, use_cuda = CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test = True, penalty = penalty, use_cuda = CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) suma = 0 for p in model.parameters(): p = p.data.cpu().numpy() suma += (abs(p) < ZERO_THRESHOLD).sum() print( "Number of zero weights: %d" % (suma) ) else: # copy model model_copy = copy.deepcopy(model) print("==> Selective Retraining") ## Solve Eq.3 # freeze all layers except the last one (last 2 parameters) params = list(model.parameters()) for param in params[:-2]: param.requires_grad = False optimizer = optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY ) penalty = l1_penalty(coeff = L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer = optimizer, penalty = penalty, use_cuda = CUDA) train(validloader, model, criterion, ALL_CLASSES, [cls], test = True, penalty = penalty, use_cuda = CUDA) for param in model.parameters(): param.requires_grad = True print("==> Selecting Neurons") hooks = select_neurons(model, t) print("==> Training Selected Neurons") optimizer = optim.SGD( model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=1e-4 ) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer = optimizer, use_cuda = CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test = True, use_cuda = CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) # remove hooks for hook in hooks: hook.remove() print("==> Splitting Neurons") split_neurons(model_copy, model) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA) print( 'AUROC: {}'.format(auroc) ) AUROCs.append(auroc) print( '\nAverage Per-task Performance over number of tasks' ) for i, p in enumerate(AUROCs): print("%d: %f" % (i+1,p))