def main(args): trainloader, testloader = build_dataset( "cifar10", dataroot=args.dataroot, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, num_workers=2, ) if args.fname: print("Loading model from %s" % args.fname) model = torch.load(args.fname, map_location="cpu").cuda() else: model = build_model("ResNet18", num_classes=10) criterion = torch.nn.CrossEntropyLoss() eigenvals, eigenvecs = compute_hessian_eigenthings( model, testloader, criterion, args.num_eigenthings, mode=args.mode, # power_iter_steps=args.num_steps, max_samples=args.max_samples, # momentum=args.momentum, full_dataset=args.full_dataset, use_gpu=args.cuda, ) print("Eigenvecs:") print(eigenvecs) print("Eigenvals:") print(eigenvals)
def calculate_low_rank_tic(args, loader, model, rank=1): loss = torch.nn.functional.nll_loss num_eigenthings = 1 # compute top 20 eigenvalues/eigenvectors eigenvals, eigenvecs = compute_hessian_eigenthings( model, loader, loss, num_eigenthings, use_gpu=torch.cuda.is_available()) low_rank_tic = 0 n_examples = 0 for i, (x, y) in enumerate(loader): x, y = x.to(args.device), y.to(args.device) for t in range(x.size(0)): # print(x[t].unsqueeze(0).size()) y_pred = model(x[t].unsqueeze(0)) loss = F.nll_loss(y_pred, y[t].unsqueeze(0), reduction='sum') grads = torch.autograd.grad(loss, model.parameters()) grads = torch.cat([g.view(-1) for g in grads]).numpy() low_rank_tic += (1 / eigenvals) @ (eigenvecs @ grads)**2 #optimizer.zero_grad( # cov += torch.ger(grads, grads).detach() n_examples += x.size(0) if n_examples > args.estim_size: break # print(res) return low_rank_tic / n_examples
def process_dataset(self, dataset): """ summarizes information about training data in terms of curvature of loss in weight space. here we save only the top eigenvectors of the hessian. """ # MVP_op = get_hvp_op(self.model, dataset, criterion, use_gpu=self.gpu, n_samples=32) # eigvals, eigvecs = lanczos_eigenvecs(MVP_op, # N=self.n_params, # num_eigs=self.num_eigs) self.model.eval() self.mean.data = self._get_param_vec().detach().cpu() dataloader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True) eigs, eigvecs = compute_hessian_eigenthings( self.model,dataloader, lambda x,y: self.dist_fam.loss(x,y).mean(), self.num_eigs, mode='power_iter', full_dataset=self.config['full_data'], max_samples=self.max_samples, use_gpu=self.gpu, momentum=0.9, power_iter_steps=20) eigvecs=eigvecs.T self.eig_weights.data = torch.from_numpy(eigs.copy() ).float() self.top_eigs.data = torch.from_numpy( eigvecs.copy() ).float() self.configured.data = torch.ones(1, dtype=torch.bool)
def test_stochastic_hessian(model, criterion, real_hessian, x, y, bs=10, ntrials=10): samples = [(x_i, y_i) for x_i, y_i in zip(x, y)] # full dataset dataloader = DataLoader(samples, batch_size=bs) eigenvals = [] eigenvecs = [] nparams = len(real_hessian) for _ in range(ntrials): est_eigenvals, est_eigenvecs = compute_hessian_eigenthings( model, dataloader, criterion, num_eigenthings=nparams, power_iter_steps=10, power_iter_err_threshold=1e-5, momentum=0, use_gpu=False, ) est_eigenvals = np.array(est_eigenvals) est_eigenvecs = np.array([t.numpy() for t in est_eigenvecs]) est_inds = np.argsort(est_eigenvals) est_eigenvals = np.array(est_eigenvals)[est_inds][::-1] est_eigenvecs = np.array(est_eigenvecs)[est_inds][::-1] eigenvals.append(est_eigenvals) eigenvecs.append(est_eigenvecs) eigenvals = np.array(eigenvals) eigenvecs = np.array(eigenvecs) real_eigenvals, real_eigenvecs = np.linalg.eig(real_hessian) real_inds = np.argsort(real_eigenvals) real_eigenvals = np.array(real_eigenvals)[real_inds][::-1] real_eigenvecs = np.array(real_eigenvecs)[real_inds][::-1] # Plot eigenvalue error plt.suptitle("Stochastic Hessian eigendecomposition errors: %d trials" % ntrials) plt.subplot(1, 2, 1) plt.title("Eigenvalues") plt.plot(list(range(nparams)), real_eigenvals, label="True Eigenvals") plot_eigenval_estimates(eigenvals, label="Estimates") plt.legend() # Plot eigenvector L2 norm error plt.subplot(1, 2, 2) plt.title("Eigenvector cosine simliarity") plot_eigenvec_errors(real_eigenvecs, eigenvecs, label="Estimates") plt.legend() plt.savefig("stochastic.png") plt.clf()
def test_full_hessian(model, criterion, x, y, ntrials=10): loss = criterion(model(x), y) loss_grad = torch.autograd.grad(loss, model.parameters(), create_graph=True) real_hessian = get_full_hessian(loss_grad, model) samples = [(x_i, y_i) for x_i, y_i in zip(x, y)] # full dataset dataloader = DataLoader(samples, batch_size=len(x)) eigenvals = [] eigenvecs = [] nparams = len(real_hessian) for _ in range(ntrials): est_eigenvals, est_eigenvecs = compute_hessian_eigenthings( model, dataloader, criterion, num_eigenthings=nparams, power_iter_steps=10, power_iter_err_threshold=1e-5, momentum=0, use_gpu=False, ) est_inds = np.argsort(est_eigenvals) est_eigenvals = np.array(est_eigenvals)[est_inds][::-1] est_eigenvecs = np.array(est_eigenvecs)[est_inds][::-1] eigenvals.append(est_eigenvals) eigenvecs.append(est_eigenvecs) eigenvals = np.array(eigenvals) eigenvecs = np.array(eigenvecs) real_eigenvals, real_eigenvecs = np.linalg.eig(real_hessian) real_inds = np.argsort(real_eigenvals) real_eigenvals = np.array(real_eigenvals)[real_inds][::-1] real_eigenvecs = np.array(real_eigenvecs)[real_inds][::-1] # Plot eigenvalue error plt.suptitle("Hessian eigendecomposition errors: %d trials" % ntrials) plt.subplot(1, 2, 1) plt.title("Eigenvalues") plt.plot(list(range(nparams)), real_eigenvals, label="True Eigenvals") plot_eigenval_estimates(eigenvals, label="Estimates") plt.legend() # Plot eigenvector L2 norm error plt.subplot(1, 2, 2) plt.title("Eigenvector cosine simliarity") plot_eigenvec_errors(real_eigenvecs, eigenvecs, label="Estimates") plt.legend() plt.savefig("full.png") plt.clf() return real_hessian
def test_fixed_mini(model, criterion, real_hessian, x, y, bs=10, ntrials=10): x = x[:bs] y = y[:bs] samples = [(x_i, y_i) for x_i, y_i in zip(x, y)] # full dataset dataloader = DataLoader(samples, batch_size=len(x)) eigenvals = [] eigenvecs = [] nparams = len(real_hessian) for _ in range(ntrials): est_eigenvals, est_eigenvecs = compute_hessian_eigenthings( model, dataloader, criterion, num_eigenthings=nparams, mode='lanczos', power_iter_steps=10, power_iter_err_threshold=1e-5, momentum=0, use_gpu=False) est_eigenvals = np.array(est_eigenvals) est_eigenvecs = np.array([t.numpy() for t in est_eigenvecs]) est_inds = np.argsort(est_eigenvals) est_eigenvals = np.array(est_eigenvals)[est_inds][::-1] est_eigenvecs = np.array(est_eigenvecs)[est_inds][::-1] eigenvals.append(est_eigenvals) eigenvecs.append(est_eigenvecs) eigenvals = np.array(eigenvals) eigenvecs = np.array(eigenvecs) real_eigenvals, real_eigenvecs = np.linalg.eig(real_hessian) real_inds = np.argsort(real_eigenvals) real_eigenvals = np.array(real_eigenvals)[real_inds][::-1] real_eigenvecs = np.array(real_eigenvecs)[real_inds][::-1] # Plot eigenvalue error plt.suptitle( 'Fixed mini-batch Hessian eigendecomposition errors: %d trials' % ntrials) plt.subplot(1, 2, 1) plt.title('Eigenvalues') plt.plot(list(range(nparams)), real_eigenvals, label='True Eigenvals') plot_eigenval_estimates(eigenvals, label='Estimates') plt.legend() # Plot eigenvector L2 norm error plt.subplot(1, 2, 2) plt.title('Eigenvector cosine simliarity') plot_eigenvec_errors(real_eigenvecs, eigenvecs, label='Estimates') plt.legend() plt.savefig('fixed.png')
def compute(self, message): if message.kind == 'activations': module = message.key.module for p in module.parameters(): self._parameters.add(p) else: if self.subscriptions['batch_finished'].counter['batch_finished'] % self._frequency == 0: evals, evecs = compute_hessian_eigenthings(self._forward_fn, self._parameters, self._dataloader, self._loss_fn, power_iter_steps=self._power_steps, num_eigenthings=self._num_eig) for i, val in enumerate(sorted(evals)): self.backend.add_data(f'Eigenvalue {i}', evals[i], message.global_step)
def test_principal_eigenvec(model, criterion, x, y, ntrials, fp16): loss = criterion(model(x), y) loss_grad = torch.autograd.grad(loss, model.parameters(), create_graph=True) print("computing real hessian") real_hessian = get_full_hessian(loss_grad, model) # real_hessian += 1e-4 * np.eye(len(real_hessian)) samples = [(x_i, y_i) for x_i, y_i in zip(x, y)] # full dataset dataloader = DataLoader(samples, batch_size=len(x)) print("computing numpy principal eigenvec of hessian") num_params = len(real_hessian) real_eigenvals, real_eigenvecs = scipy.linalg.eigh( real_hessian, eigvals=(num_params - 1, num_params - 1) ) real_eigenvec, real_eigenval = real_eigenvecs[0], real_eigenvals[0] eigenvals = [] eigenvecs = [] nparams = len(real_hessian) # for _ in range(ntrials): est_eigenvals, est_eigenvecs = compute_hessian_eigenthings( model, dataloader, criterion, num_eigenthings=1, power_iter_steps=10, power_iter_err_threshold=1e-5, momentum=0, use_gpu=False, fp16=fp16 ) est_eigenval, est_eigenvec = est_eigenvecs[0], est_eigenvals[0] # compute cosine similarity print(real_eigenvec, est_eigenvec) dotted = np.dot(real_eigenvec, est_eigenvec) if dotted == 0.0: score = 1.0 # both in nullspace... nice... else: norm = scipy.linalg.norm(real_eigenvec) * scipy.linalg.norm(est_eigenvec) score = abs(dotted / norm) print(score)
def log_hessian(model, loader, time, task_id): criterion = torch.nn.CrossEntropyLoss().to(DEVICE) use_gpu = True if DEVICE != 'cpu' else False est_eigenvals, est_eigenvecs = compute_hessian_eigenthings( model, loader, criterion, num_eigenthings=NUM_EIGENS, power_iter_steps=15, power_iter_err_threshold=1e-5, momentum=0, use_gpu=True, ) key = 'task-{}-epoch-{}'.format(task_id, time-1) hessian_eig_db[key] = est_eigenvals save_eigenvec(EXPERIMENT_DIRECTORY+"/{}-vec.npy".format(key), est_eigenvecs) experiment.log_histogram_3d(name='task-{}-eigs'.format(task_id), step=time-1, values=est_eigenvals)
def eigen_approx_hessian(model, batches, args): ''' Eigen-approximation to Hessian. https://github.com/noahgolmant/pytorch-hessian-eigenthings To compute Hessian on a single minibatch, we construct a dataloader that provides only that batch. ''' loader = dataloader.get_subset_batch_loader(batches, args) eigenvals, eigenvecs = compute_hessian_eigenthings( model, loader, args['loss_func'], num_eigenthings=args['num_eigens_hessian_approx'], use_gpu=args['use_cuda'], full_dataset=True) eigenvals = np.expand_dims(eigenvals, -1) return torch.Tensor(eigenvals.copy()), torch.Tensor(eigenvecs.copy())
def main(args): trainloader, testloader = build_dataset( 'cifar10', dataroot=args.dataroot, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, num_workers=2) model = build_model('ResNet18', num_classes=10) criterion = torch.nn.CrossEntropyLoss() eigenvals, eigenvecs = compute_hessian_eigenthings(model, testloader, criterion, args.num_eigenthings, args.num_steps, momentum=args.momentum, use_gpu=args.cuda) print("Eigenvecs:") print(eigenvecs) print("Eigenvals:") print(eigenvals) track.metric(iteration=0, eigenvals=eigenvals)
def get_models_eig(models, train_loader, test_loader, loss, num_eigenthings=5, full_dataset=True, device=None, only_vals=True): eig_dict = {} # get eigenvals for k, m in models.items(): print(k) if device is not None: m = m.to(device) is_gpu = True else: is_gpu = False eigenvals, eigenvecs = compute_hessian_eigenthings( m, train_loader, loss, num_eigenthings, use_gpu=is_gpu, full_dataset=full_dataset, mode="lanczos", max_steps=100, tol=1e-2) try: # eigenvals, eigenvecs = compute_hessian_eigenthings(m, train_loader, # loss, num_eigenthings, use_gpu=use_gpu, full_dataset=full_dataset , mode="lanczos", # max_steps=50) if only_vals: eig_dict[k] = eigenvals else: eig_dict[k] = (eigenvals, eigenvecs) except: print("Error for net {}.".format(k)) return eig_dict
def hessian(model,pre_params,loader_train,data_length,device,criterion,optimizer, scheduler,print_freq, print_logger,step,batch_size,epochs=1,use_top5=False,\ verbose=True,num_eigenthings=20,mode ="power_iter",num_steps=500,max_samples= 512,momentum=0.0,full_dataset=True): # params = [] eigenvals, eigenvecs = compute_hessian_eigenthings( model, loader_train, criterion, num_eigenthings, mode=mode, power_iter_steps=num_steps, max_samples=max_samples, momentum=momentum, full_dataset=full_dataset, use_gpu=True, ) # print("Eigenvecs:") # print(eigenvecs) # print("Eigenvals:") # print(eigenvals) # # pdb.set_trace() return eigenvals, eigenvecs
trainloader, testloader = dataloader.load_dataset( args.dataset, args.datapath, args.batch_size, args.threads, args.raw_data, args.data_split, args.split_idx, args.trainloader, args.testloader) #-------------------------------------------------------------------------- # Setup loss function #-------------------------------------------------------------------------- if args.loss_name == 'crossentropy': loss = torch.nn.functional.cross_entropy else: raise Exception('Add your loss function here') #-------------------------------------------------------------------------- # Start the computation #-------------------------------------------------------------------------- eigenvals, eigenvecs = compute_hessian_eigenthings( net, trainloader, loss, args.num_eigenthings, False, "power_iter", True, args.batch_size) #-------------------------------------------------------------------------- # save results #-------------------------------------------------------------------------- sio.savemat(args.model_folder + '/eigendata_' + str(epoch) + '.mat', mdict={ 'eigenvals': eigenvals, 'eigenvecs': eigenvecs })
os.path.join(args.dir, 'mu_' + str(args.mu) + '-projection_diff_new.txt')) f.write(args.resume + '\t' + " ".join(map(str, pre_calculated_eigen_info['tmp'])) + "\t") f.write(' '.join([str(j) for j in result])) f.write('\n') else: while True: time_start = time.time() eigenvals, eigenvecs = compute_hessian_eigenthings( model, loaders['train'], criterion, num * 3, mode=args.mode, # power_iter_steps=args.num_steps, max_samples=args.max_samples, # momentum=args.momentum, full_dataset=args.full_dataset, use_gpu=args.cuda, ) last = -np.array(range(1, (num + 1))) tmp = eigenvals[last] tmp_eigen = eigenvecs[last, ] np.savez(previous_calculated_eigen_file, tmp=tmp, tmp_eigen=tmp_eigen) if tmp[tmp < 0].size == 0: result = [] for i in range(0, num): result.append(