def main(): X, Y = twospirals(500, noise=1.3) train_x = torch.FloatTensor(X) train_y = torch.FloatTensor(Y).unsqueeze(-1) X, Y = twospirals(100, noise=1.3) test_x = torch.FloatTensor(X) test_y = torch.FloatTensor(Y).unsqueeze(-1) use_cuda = torch.cuda.is_available() if use_cuda: torch.cuda.set_device(0) torch.set_default_tensor_type(torch.cuda.FloatTensor) train_x, train_y = train_x.cuda(), train_y.cuda() test_x, test_y = test_x.cuda(), test_y.cuda() loss_func = torch.nn.BCEWithLogitsLoss() lr = 0.01 n_trials = 2 n_iters = 1000 losses = torch.zeros(n_trials, n_iters) test_losses = torch.zeros(n_trials, n_iters) init_eigs = [] final_eigs = [] pct_keep = 0.4 optim = torch.optim.Adam for trial in range(n_trials): model = hess.nets.MaskedNet(train_x, train_y, bias=True, n_hidden=5, hidden_size=10, activation=torch.nn.ELU(), pct_keep=pct_keep) if use_cuda: model = model.cuda() mask, perm = hess.utils.mask_model(model, pct_keep, use_cuda) keepers = np.array(np.where(mask.cpu() == 1))[0] ## compute hessian pre-training ## initial_evals = utils.get_hessian_eigs(train_x, train_y, loss=loss_func, model=model, mask=mask, use_cuda=use_cuda, n_eigs=100) init_eigs.append(initial_evals) # hessian = utils.get_hessian(train_x, train_y, loss=loss_func, # model=model, use_cuda=use_cuda) # sub_hess = hessian[np.ix_(keepers, keepers)] # e_val, _ = np.linalg.eig(sub_hess.cpu().detach()) # init_eigs.append(e_val.real) ## train ## optimizer = optim(model.parameters(), lr=lr) for step in range(n_iters): optimizer.zero_grad() outputs = model(train_x) loss = loss_func(outputs, train_y) losses[trial, step] = loss test_out = model(test_x) test_losses[trial, step] = loss_func(test_out, test_y) loss.backward() optimizer.step() ## compute final hessian ## hessian = utils.get_hessian(train_x, train_y, loss=loss_func, model=model, use_cuda=use_cuda) sub_hess = hessian[np.ix_(keepers, keepers)] e_val, _ = np.linalg.eig(sub_hess.cpu().detach()) final_eigs.append(e_val.real) print("model ", trial, " done") fpath = "../saved-experiments/" fname = "losses.pt" torch.save(losses, fpath + fname) fname = "init_eigs.P" with open(fpath + fname, 'wb') as fp: pickle.dump(init_eigs, fp) fname = "final_eigs.P" with open(fpath + fname, 'wb') as fp: pickle.dump(final_eigs, fp)
def main(): torch.random.manual_seed(88) X, Y = twospirals(500, noise=1.3) train_x = torch.FloatTensor(X) train_y = torch.FloatTensor(Y).unsqueeze(-1) ################################### ## Set up nets and match weights ## ################################### n_hidden = 5 width = 1024 subnet_model = SubNetLinear(in_dim=2, out_dim=1, n_layers=n_hidden, k=width) masked_model = MaskedNetLinear(in_dim=2, out_dim=1, n_layers=n_hidden, k=width) hess.net_utils.set_model_prune_rate(subnet_model, 0.5) hess.net_utils.freeze_model_weights(subnet_model) weights = net_utils.get_weights_from_subnet(subnet_model) net_utils.apply_weights(masked_model, weights) mask = net_utils.get_mask_from_subnet(subnet_model) net_utils.apply_mask(masked_model, mask) mask = utils.flatten(mask) print(mask) use_cuda = torch.cuda.is_available() if use_cuda: print('using cuda') torch.cuda.set_device(0) train_x, train_y = train_x.cuda(), train_y.cuda() subnet_model = subnet_model.cuda() masked_model = masked_model.cuda() ###################### ## Train the Subnet ## ###################### optimizer = torch.optim.Adam(subnet_model.parameters(), lr=0.001) loss_func = torch.nn.BCEWithLogitsLoss() n_eigs = 200 n_iters = 1000 eigs_out = [] eig_steps = [] losses = torch.zeros(n_iters) eigs_every = 5 min_loss = 1. prev_computed_step = -eigs_every-1 for step in range(n_iters): optimizer.zero_grad() outputs = subnet_model(train_x) loss=loss_func(outputs,train_y) print(loss) losses[step] = loss.item() loss.backward() optimizer.step() if losses[step] < min_loss + 0.01: min_loss = losses[step] if step > prev_computed_step + eigs_every: mask = net_utils.get_mask_from_subnet(subnet_model) net_utils.apply_mask(masked_model, mask) mask = utils.flatten(mask) eigs = utils.get_hessian_eigs(loss_func, masked_model, mask=mask, n_eigs=n_eigs, train_x=train_x, train_y=train_y) eigs_out.append(eigs) eig_steps.append(step) prev_computed_step = step print("step ", step, " done") # if step >= 1: # if losses[step] > losses[step-1] + 0.1: # mask = net_utils.get_mask_from_subnet(subnet_model) # net_utils.apply_mask(masked_model, mask) # mask = utils.flatten(mask) # eigs = utils.get_hessian_eigs(loss_func, masked_model, mask=mask, # n_eigs=n_eigs, train_x=train_x, # train_y=train_y) # eigs_out.append(eigs) # eig_steps.append(step) # prev_computed_step = step # print("step ", step, " done") fpath = "./saved-subnet-hessian_0116/" fname = "subnet_eigs.pkl" subnet_eigs = [ee.cpu() for ee in eigs_out] with open(fpath + fname, 'wb') as f: pickle.dump(subnet_eigs, f) fname = "eig_steps.pkl" with open(fpath + fname, 'wb') as f: pickle.dump(eig_steps, f) fname = "subnet_model.pt" torch.save(subnet_model.state_dict(), fpath + fname) fname = "masked_model.pt" torch.save(masked_model.state_dict(), fpath + fname) fname = "losses.pt" torch.save(losses, fpath + fname)
def main(): args = parser() args.device = None if torch.cuda.is_available(): args.device = torch.device("cuda") args.cuda = True else: args.device = torch.device("cpu") args.cuda = False #loss_func = torch.nn.BCEWithLogitsLoss() #lr = 0.01 n_trials = 10 #n_iters = 1000 #losses = torch.zeros(n_trials, n_iters) init_eigs = [] final_eigs = [] #pct_keep = 0.4 #optim = torch.optim.SGD print("Preparing base directory %s" % args.dir) os.makedirs(args.dir, exist_ok=True) for trial in range(n_trials): print("Preparing directory %s" % args.dir + '/trial_' + str(trial)) os.makedirs(args.dir + '/trial_' + str(trial), exist_ok=True) with open( os.path.join(args.dir + '/trial_' + str(trial), "command.sh"), "w") as f: f.write(" ".join(sys.argv)) f.write("\n") torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) print("Using model %s" % args.model) model_cfg = getattr(models, args.model) print("Loading dataset %s from %s" % (args.dataset, args.data_path)) loaders, num_classes = data.loaders( args.dataset, args.data_path, args.batch_size, args.num_workers, model_cfg.transform_train, model_cfg.transform_test, use_validation=not args.use_test, split_classes=args.split_classes, ) print("Preparing model") print(*model_cfg.args) model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs, use_masked=True) model.to(args.device) # bad set to for now for m in model.modules(): if isinstance(m, hess.nets.MaskedConv2d) or isinstance( m, hess.nets.MaskedLinear): if m.mask is not None and m.weight is not None: m.mask = m.mask.to(m.weight.device) if m.has_bias: if m.bias_mask is not None and m.bias is not None: m.bias_mask = m.bias_mask.to(m.bias.device) mask = hess.utils.get_mask(model) #mask, perm = hess.utils.mask_model(model, pct_keep, use_cuda) #keepers = np.array(np.where(mask.cpu() == 1))[0] criterion = torch.nn.functional.cross_entropy ## compute hessian pre-training ## initial_evals = utils.get_hessian_eigs(loss=criterion, model=model, mask=mask, use_cuda=args.cuda, n_eigs=100, loader=loaders['train']) init_eigs.append(initial_evals) ## train ## optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_init, momentum=args.momentum, weight_decay=args.wd) for epoch in range(0, args.epochs): train_epoch(model, loaders, swag.losses.cross_entropy, optimizer, epoch=epoch, end_epoch=args.epochs, eval_freq=args.eval_freq, save_freq=args.save_freq, output_dir=args.dir + '/trial_' + str(trial), lr_init=args.lr_init) ## compute final hessian ## final_evals = utils.get_hessian_eigs(loss=criterion, model=model, use_cuda=args.cuda, n_eigs=100, mask=mask, loader=loaders['train']) # sub_hess = hessian[np.ix_(keepers, keepers)] # e_val, _ = np.linalg.eig(sub_hess.cpu().detach()) # final_eigs.append(e_val.real) final_eigs.append(final_evals) print("model ", trial, " done") # fpath = "../saved-experiments/" # fname = "losses.pt" # torch.save(losses, fpath + fname) fpath = args.dir + '/trial_' + str(trial) fname = "init_eigs.P" with open(fpath + fname, 'wb') as fp: pickle.dump(init_eigs, fp) fname = "final_eigs.P" with open(fpath + fname, 'wb') as fp: pickle.dump(final_eigs, fp)
trainset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') dataiter = iter(testloader) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) evals, evecs = get_hessian_eigs(loss=criterion, model=model, use_cuda=True, n_eigs=200, loader=trainloader, evals=True) fpath = "./" fname = "cifar_evals_200.pt" torch.save(evals, fpath + fname) fname = "cifar_evecs_200.pt" torch.save(evecs, fpath + fname)
def main(): use_cuda = torch.cuda.is_available() model = Net() criterion = torch.nn.CrossEntropyLoss() if use_cuda: model = model.cuda() transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) ## Super Trainer ## optimizer = torch.optim.Adam(model.parameters(), lr=0.001) for epoch in range(30): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data if use_cuda: inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 100 == 99: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 fpath = "./outputs/" fname = "saved_model.pt" torch.save(model.state_dict(), fpath + fname) evals, evecs = get_hessian_eigs(loss=criterion, model=model, use_cuda=use_cuda, n_eigs=200, loader=trainloader, evals=True) print("positive evals = ", evals) ## clean these guys up ## keep = np.where(evals.cpu() != 1) evals = evals[keep].squeeze() evecs = evecs[:, keep].squeeze() fname = "top_evecs.pt" torch.save(evecs, fpath + fname) fname = "top_evals.pt" torch.save(evals, fpath + fname)