def optimize_para(wrapper, param, target, criterion, num_step, save_prefix=None, res=False): """ wrapper: image = wrapper(z / w/ w+): an interface for a generator forward pass. param: z / w / w+ target: (1, C, H, W) criterion: loss(pred, target) """ param = param.requires_grad_().to(device) optimizer = FullBatchLBFGS([param], lr=.1, line_search='Wolfe') iter_count = [0] def closure(): # todo: your optimiztion if iter_count[0] % 250 == 0 and save_prefix is not None: # visualization code print('iter count {} loss {:4f}'.format(iter_count, loss.item())) iter_result = image.data.clamp_(-1, 1) save_images(iter_result, save_prefix + '_%d' % iter_count[0]) return loss loss = closure() loss.backward() while iter_count[0] <= num_step: options = {'closure': closure, 'max_ls': 10} loss, _, lr, _, F_eval, G_eval, _, _ = optimizer.step(options) image = wrapper(param) return param, image
def train(train_x, train_y, n_devices, output_device, checkpoint_size, preconditioner_size, n_training_iter, ): likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device) model = ExactGPModel(train_x, train_y, likelihood, n_devices, F).to(output_device) model.train() likelihood.train() optimizer = FullBatchLBFGS(model.parameters(), lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) print("Here 1") with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), gpytorch.settings.max_preconditioner_size(preconditioner_size): def closure(): print("Zeroing Grads") optimizer.zero_grad() print("Zeroed Grads") output = model(train_x) print("Computed output") loss = -mll(output, train_y) print("Ran closure") return loss loss = closure() loss.backward() print("Ran backward") for i in range(n_training_iter): options = {'closure': closure, 'current_loss': loss, 'max_ls': 10} print("Begin Optim") loss, _, _, _, _, _, _, fail = optimizer.step(options) print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % ( i + 1, n_training_iter, loss.item(), model.covar_module.module.base_kernel.lengthscale.item(), model.likelihood.noise.item() )) if fail: print('Convergence reached!') break print(f"Finished training on {train_x.size(0)} data points using {n_devices} GPUs.") return model, likelihood
def train_gp_model_LBFGS(self, train_x, train_y): # Use full-batch L-BFGS optimizer # optimizer = FullBatchLBFGS(self.parameters(), lr=1) optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1) # Access aprameters: self.likelihood.noise_covar.raw_noise self.likelihood.noise_covar.initialize(raw_noise=0.) self.mean_module.initialize(constant=0.) para_to_check = self.covar_module.base_kernel para_to_check.initialize(raw_lengthscale=0.) # self.covar_module.initialize(raw_lengthscale=0.) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self) # define closure # define closure def closure(): optimizer.zero_grad() output = self(train_x.double()) loss = -mll(output, train_y.double()).sum() return loss loss = closure() loss.backward() training_iter = 20 for i in range(training_iter): # perform step and update curvature # optimizer.zero_grad() options = { 'closure': closure, 'current_loss': loss, 'max_ls': 20, 'eta': 2 } loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step( options) if self.verbose: logger.info( 'Iter %d/%d - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f' % (i + 1, training_iter, loss.item(), lr, F_eval, G_eval, fail)) # logger.info(str(g_new)) if torch.isnan(para_to_check.raw_lengthscale.data): # logger.warning('NaN detected') # self.covar_module.initialize(raw_lengthscale=1E-6) para_to_check.initialize(raw_lengthscale=1E-6)
buildPhi_basic = gprh.buildPhi(m,type=meastype,tun=tun1) if integral: closure_basic = gprh.gp_closure(model_basic, meastype, buildPhi_basic, lossfu_basic, n, dom_points, train_y) else: closure_basic = gprh.gp_closure(model_basic, meastype, buildPhi_basic, lossfu_basic, n, dom_points, train_y, train_x=train_x) loss_basic = closure_basic() for i in range(training_iterations_basic): options = {'line_search': True, 'closure': closure_basic, 'max_ls': 3, 'ls_debug': False, 'inplace': False, 'interpolate': False, 'eta': 3, 'c1': 1e-4, 'decrease_lr_on_max_ls': 0.1, 'increase_lr_on_min_ls': 5} optimiser_basic.zero_grad() # zero gradients loss_basic.backward() # Backprop derivatives loss_basic, lr, ls_step = optimiser_basic.step(options=options) # compute new loss # print gprh.optiprint(i, training_iterations_basic, loss_basic.item(), lr, ls_step, model_basic, buildPhi_basic.L) if integral: test_f = gprh.compute_and_save(model_basic, meastype, dataname, train_y, n, X, Y, Z, ntx, nty, test_x, dom_points, m, dim, mt, noise_std, lossfu_basic, buildPhi_basic, optimiser_basic, training_iterations_basic, joint=True, x0=x0, unitvecs=unitvecs, Rlim=Rlim, rec_fbp=rec_fbp, err_fbp=err_fbp, basic=True) if point: test_f = gprh.compute_and_save(model_basic, meastype, dataname, train_y, n, X, Y, Z, ntx, nty, test_x, dom_points, m, dim, mt, noise_std, lossfu_basic, buildPhi_basic, optimiser_basic, training_iterations_basic, joint=True, train_x=train_x, basic=True) try: # since plotting might produce an error on remote machines vmin = 0
# model.scale2*=1.2 options = { 'closure': closure, 'max_ls': 5, 'ls_debug': False, 'inplace': False, 'interpolate': False, 'eta': 3, 'c1': 1e-4, 'decrease_lr_on_max_ls': 0.1, 'increase_lr_on_min_ls': 5 } optimiser.zero_grad() # zero gradients loss.backward() # propagate derivatives loss, lr, ls_iters = optimiser.step(options=options) # compute new loss # print gprh.optiprint(i, training_iterations, loss.item(), lr, ls_iters, model, L) # update phi phi, sq_lambda, L = buildPhi.getphi(model, m, n, mt, train_x=train_x, dom_points=dom_points) # now make predictions test_f, cov_f = model(y_train=train_y, phi=phi,
# Use full-batch L-BFGS optimizer optimizer = FullBatchLBFGS(model.parameters()) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) # define closure def closure(): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) return loss loss = closure() loss.backward() training_iter = 10 for i in range(training_iter): # perform step and update curvature options = {'closure': closure, 'current_loss': loss, 'max_ls': 10} loss, _, lr, _, F_eval, G_eval, _, _ = optimizer.step(options) print( 'Iter %d/%d - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - Log-Lengthscale: %.3f - Log_Noise: %.3f' % (i + 1, training_iter, loss.item(), lr, F_eval, G_eval, model.covar_module.base_kernel.log_lengthscale.item(), model.likelihood.log_noise.item()))
ops = opfun(X_train[subsmpl]) if (torch.cuda.is_available()): tgts = torch.from_numpy( y_train[subsmpl]).cuda().long().squeeze() else: tgts = torch.from_numpy(y_train[subsmpl]).long().squeeze() loss_fn += F.cross_entropy(ops, tgts) * (len(subsmpl) / no_samples) return loss_fn # perform line search step options = {'closure': closure, 'current_loss': obj} obj, grad, lr, _, _, _, _, _ = optimizer.step(options) # compute statistics model.eval() train_loss, test_loss, test_acc = compute_stats(X_train, y_train, X_test, y_test, opfun, accfun, ghost_batch=128) # print data print('Iter:', n_iter + 1, 'lr:', lr, 'Training Loss:', train_loss, 'Test Loss:', test_loss, 'Test Accuracy:', test_acc)
def train_gp_model_LBFGS_SGP(self, train_x, train_y): # Use full-batch L-BFGS optimizer # optimizer = FullBatchLBFGS(self.parameters(), lr=1) train_dataset = TensorDataset(train_x, train_y) train_loader = DataLoader(train_dataset, batch_size=int( max(min(train_y.size(0) / 100, 1E4), 100)), shuffle=True) self.train() self.likelihood.train() optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1) # Access aprameters: self.likelihood.noise_covar.raw_noise # self.likelihood.noise_covar.initialize(raw_noise=0.) # self.mean_module.initialize(constant=0.) # para_to_check = self.covar_module.base_kernel # para_to_check.initialize(raw_lengthscale=0.) # self.covar_module.initialize(raw_lengthscale=0.) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.VariationalELBO(self.likelihood, self, num_data=train_y.size(0), combine_terms=False).cuda() scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5], gamma=0.1) # define closure training_iter = 20 num_epochs = 2 for i in range(num_epochs): scheduler.step() for minibatch_i, (x_batch, y_batch) in enumerate(train_loader): # define closure def closure(): optimizer.zero_grad() # output = self(train_x.double()) # loss = -mll(output, train_y.double()).sum() with gpytorch.settings.use_toeplitz(False): output = self(x_batch.double()) log_lik, kl_div, log_prior = mll( output, y_batch.double()) loss = -(log_lik - kl_div + log_prior).sum() return loss loss = closure() loss.backward() # perform step and update curvature # optimizer.zero_grad() options = { 'closure': closure, 'current_loss': loss, 'max_ls': 1, 'eta': 2 } loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step( options) if self.verbose: logger.info( 'Iter %d[%d/%d] - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f' % (i + 1, minibatch_i, len(train_loader), loss.item(), lr, F_eval, G_eval, fail))
# model.scale2*=1.2 options = { 'closure': closure, 'max_ls': 3, 'ls_debug': False, 'inplace': False, 'interpolate': False, 'eta': 3, 'c1': 1e-4, 'decrease_lr_on_max_ls': 0.1, 'increase_lr_on_min_ls': 5 } optimiser.zero_grad() # zero gradients loss.backward() # propagate derivatives loss, lr, ls_iters = optimiser.step(options=options) # compute new loss # print gprh.optiprint(i, training_iterations, loss.item(), lr, ls_iters, model, L) # update phi phi, sq_lambda, L = buildPhi.getphi(model, n, train_x=train_x, dom_points=dom_points) # now make predictions test_f, cov_f = model(y_train=train_y, phi=phi, sq_lambda=sq_lambda, L=L,
def train(self, train_x, train_y, n_devices, output_device, checkpoint_size, preconditioner_size, n_training_iter, n_restarts=1): likelihood = gpytorch.likelihoods.GaussianLikelihood( noise_constraint=gpytorch.constraints.GreaterThan(1e-3)).to( output_device) model = ExactGPModel(train_x, train_y, likelihood, n_devices, output_device).to(output_device) model.train() likelihood.train() optimizer = FullBatchLBFGS(model.parameters(), lr=.5) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \ gpytorch.settings.max_preconditioner_size(preconditioner_size): def closure(): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) return loss loss = closure() loss.backward() for i in range(n_training_iter): options = { 'closure': closure, 'current_loss': loss, 'max_ls': 20 } loss, _, _, _, _, _, _, fail = optimizer.step(options) if self.verbosity > 2: print_lengthscale = [ "%.3f" % p.item() for p in model.covar_module.module.lengthscale ] print( f'Iter {i+1}/{n_training_iter} - Loss: {"%.3f"%loss.item()} lengthscale: {print_lengthscale} noise: {"%.3f"%model.likelihood.noise.item()}' ) if fail: for pname, p in model.named_parameters(): print(pname, p.grad) if self.verbosity > 2: print('Convergence reached!') break if self.verbosity > 2: print( "Finished training on {0} data points using {1} GPUs.".format( train_x.size(-2), n_devices)) return model, likelihood, mll
options = { 'line_search': True, 'closure': closure2, 'max_ls': 3, 'ls_debug': False, 'inplace': False, 'interpolate': False, 'eta': 3, 'c1': 1e-4, 'decrease_lr_on_max_ls': 0.1, 'increase_lr_on_min_ls': 5 } optimiser_pt.zero_grad() # zero gradients loss2.backward() # propagate derivatives loss2, lr, ls_step = optimiser_pt.step(options=options) # compute new loss # print gprh.optiprint(i, training_iterations_pt, loss2.item(), lr, ls_step) ######################################################### # joint training ######################################################### print('\n=========Training the joint model=========') # set appr params dim = len(m) # nr of latent outputs mt = np.prod(m) # total nr of basis functions # buildPhi object if integral:
def closure(): optimizer.zero_grad() loss_fn = model() return loss_fn # perform line search step options = { 'closure': closure, 'current_loss': obj, 'eta': 2, 'max_ls': max_ls, 'interpolate': interpolate, 'inplace': False } if (line_search == 'Armijo'): obj, lr, backtracks, clos_evals, desc_dir, fail = optimizer.step( options=options) # compute gradient at new iterate obj.backward() grad = optimizer._gather_flat_grad() elif (line_search == 'Wolfe'): obj, grad, lr, backtracks, clos_evals, grad_evals, desc_dir, fail = optimizer.step( options=options) x_new.copy_(model.x()) func_evals += clos_evals # compute quantities for checking convergence grad_norm = torch.norm(grad)