def optimize_para(wrapper, param, target, criterion, num_step, save_prefix=None, res=False): """ wrapper: image = wrapper(z / w/ w+): an interface for a generator forward pass. param: z / w / w+ target: (1, C, H, W) criterion: loss(pred, target) """ param = param.requires_grad_().to(device) optimizer = FullBatchLBFGS([param], lr=.1, line_search='Wolfe') iter_count = [0] def closure(): # todo: your optimiztion if iter_count[0] % 250 == 0 and save_prefix is not None: # visualization code print('iter count {} loss {:4f}'.format(iter_count, loss.item())) iter_result = image.data.clamp_(-1, 1) save_images(iter_result, save_prefix + '_%d' % iter_count[0]) return loss loss = closure() loss.backward() while iter_count[0] <= num_step: options = {'closure': closure, 'max_ls': 10} loss, _, lr, _, F_eval, G_eval, _, _ = optimizer.step(options) image = wrapper(param) return param, image
def train(train_x, train_y, n_devices, output_device, checkpoint_size, preconditioner_size, n_training_iter, ): likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device) model = ExactGPModel(train_x, train_y, likelihood, n_devices, F).to(output_device) model.train() likelihood.train() optimizer = FullBatchLBFGS(model.parameters(), lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) print("Here 1") with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), gpytorch.settings.max_preconditioner_size(preconditioner_size): def closure(): print("Zeroing Grads") optimizer.zero_grad() print("Zeroed Grads") output = model(train_x) print("Computed output") loss = -mll(output, train_y) print("Ran closure") return loss loss = closure() loss.backward() print("Ran backward") for i in range(n_training_iter): options = {'closure': closure, 'current_loss': loss, 'max_ls': 10} print("Begin Optim") loss, _, _, _, _, _, _, fail = optimizer.step(options) print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % ( i + 1, n_training_iter, loss.item(), model.covar_module.module.base_kernel.lengthscale.item(), model.likelihood.noise.item() )) if fail: print('Convergence reached!') break print(f"Finished training on {train_x.size(0)} data points using {n_devices} GPUs.") return model, likelihood
def train_gp_model_LBFGS(self, train_x, train_y): # Use full-batch L-BFGS optimizer # optimizer = FullBatchLBFGS(self.parameters(), lr=1) optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1) # Access aprameters: self.likelihood.noise_covar.raw_noise self.likelihood.noise_covar.initialize(raw_noise=0.) self.mean_module.initialize(constant=0.) para_to_check = self.covar_module.base_kernel para_to_check.initialize(raw_lengthscale=0.) # self.covar_module.initialize(raw_lengthscale=0.) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self) # define closure # define closure def closure(): optimizer.zero_grad() output = self(train_x.double()) loss = -mll(output, train_y.double()).sum() return loss loss = closure() loss.backward() training_iter = 20 for i in range(training_iter): # perform step and update curvature # optimizer.zero_grad() options = { 'closure': closure, 'current_loss': loss, 'max_ls': 20, 'eta': 2 } loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step( options) if self.verbose: logger.info( 'Iter %d/%d - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f' % (i + 1, training_iter, loss.item(), lr, F_eval, G_eval, fail)) # logger.info(str(g_new)) if torch.isnan(para_to_check.raw_lengthscale.data): # logger.warning('NaN detected') # self.covar_module.initialize(raw_lengthscale=1E-6) para_to_check.initialize(raw_lengthscale=1E-6)
######### step 1 # set appr params m = [90,90] # nr of basis functions in each latent direction: Change this to add latent outputs covfunc1 = gprh.covfunc('matern',nu=2.5) tun1 = 30 model_basic = gpnets.gpnet2_2_1(sigma_f=1, lengthscale=[1], sigma_n=1, covfunc=covfunc1) # pure GP training_iterations_basic = 1 # loss function lossfu_basic = gprh.NegLOOCrossValidation_phi_noBackward(model_basic.gp.covfunc) # optimiser optimiser_basic = FullBatchLBFGS(model_basic.parameters(), lr=1, history_size=10) ######### step 2/3 covfunc2 = gprh.covfunc('matern',nu=2.5) tun = 30 # scaling parameter for L model = gpnets.gpnet2_1_11(sigma_f=1,lengthscale=[1],sigma_n=1, covfunc=covfunc2) # GP/NN m3 = [150] ######### step 2 ntp = 100 # number of training points (in each direction) training_iterations2 = 6 optimiser2 = FullBatchLBFGS(model.parameters(), lr=1, history_size=10) regweight2 = 0.0 ######### step 3
# loss function lossfu = gprh.NegLOOCrossValidation_phi_noBackward() # lossfu = gprh.NegMarginalLogLikelihood_phi_noBackward() # buildPhi object int_method = 1 # 1) trapezoidal, 2) simpsons standard, 3) simpsons 3/8 ni = 200 # nr of intervals in numerical integration tun = 4 # scaling parameter for L (nr of "std":s) buildPhi = gprh.buildPhi(m, type=meastype, ni=ni, int_method=int_method, tun=tun) # optimiser optimiser = FullBatchLBFGS(model.parameters(), lr=1, history_size=10) # closure: should return the loss def closure(): global L optimiser.zero_grad() # zero gradients phi, sq_lambda, L = buildPhi.getphi(model, m, n, mt, train_x=train_x, dom_points=dom_points) return lossfu(model.gp.log_sigma_f, model.gp.log_lengthscale, model.gp.log_sigma_n, phi, train_y, sq_lambda)
def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) # initialize likelihood and model likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() likelihood.train() # Use full-batch L-BFGS optimizer optimizer = FullBatchLBFGS(model.parameters()) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) # define closure def closure(): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) return loss loss = closure() loss.backward()
opfun = lambda X: model.forward(torch.from_numpy(X)) # Forward pass through the network given the input if (cuda): predsfun = lambda op: np.argmax(op.cpu().data.numpy(), 1) else: predsfun = lambda op: np.argmax(op.data.numpy(), 1) # Do the forward pass, then compute the accuracy accfun = lambda op, y: np.mean(np.equal(predsfun(op), y.squeeze())) * 100 #%% Define optimizer optimizer = FullBatchLBFGS(model.parameters(), lr=1, history_size=10, line_search='Wolfe', debug=True) #%% Main training loop no_samples = X_train.shape[0] # compute initial gradient and objective grad, obj = get_grad(optimizer, X_train, y_train, opfun) # main loop for n_iter in range(max_iter): # training mode model.train()
training_iterations = 5000 if integral: (model, dataname, train_y, n, x0, unitvecs, Rlim, X, Y, Z, rec_fbp, err_fbp, ntx, nty, test_x, dom_points, m, dim, mt, test_f, cov_f, noise_std, lossfu, buildPhi, opti_state, it_number) = \ torch.load(filepath) if point: (model, dataname, train_y, n, train_x, X, Y, Z, ntx, nty, test_x, dom_points, m, dim, mt, test_f, cov_f, noise_std, lossfu, buildPhi, opti_state, it_number) = \ torch.load(filepath) # optimiser optimiser = FullBatchLBFGS(model.parameters()) # make sure it's the same optimiser.__setstate__(opti_state) if integral: closure = gprh.gp_closure(model, meastype, buildPhi, lossfu, n, dom_points, train_y, regweight=regweight) else: closure = gprh.gp_closure(model, meastype, buildPhi,
def train_gp_model_LBFGS_SGP(self, train_x, train_y): # Use full-batch L-BFGS optimizer # optimizer = FullBatchLBFGS(self.parameters(), lr=1) train_dataset = TensorDataset(train_x, train_y) train_loader = DataLoader(train_dataset, batch_size=int( max(min(train_y.size(0) / 100, 1E4), 100)), shuffle=True) self.train() self.likelihood.train() optimizer = FullBatchLBFGS(self.parameters(), lr=1E-1) # Access aprameters: self.likelihood.noise_covar.raw_noise # self.likelihood.noise_covar.initialize(raw_noise=0.) # self.mean_module.initialize(constant=0.) # para_to_check = self.covar_module.base_kernel # para_to_check.initialize(raw_lengthscale=0.) # self.covar_module.initialize(raw_lengthscale=0.) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.VariationalELBO(self.likelihood, self, num_data=train_y.size(0), combine_terms=False).cuda() scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5], gamma=0.1) # define closure training_iter = 20 num_epochs = 2 for i in range(num_epochs): scheduler.step() for minibatch_i, (x_batch, y_batch) in enumerate(train_loader): # define closure def closure(): optimizer.zero_grad() # output = self(train_x.double()) # loss = -mll(output, train_y.double()).sum() with gpytorch.settings.use_toeplitz(False): output = self(x_batch.double()) log_lik, kl_div, log_prior = mll( output, y_batch.double()) loss = -(log_lik - kl_div + log_prior).sum() return loss loss = closure() loss.backward() # perform step and update curvature # optimizer.zero_grad() options = { 'closure': closure, 'current_loss': loss, 'max_ls': 1, 'eta': 2 } loss, g_new, lr, _, F_eval, G_eval, desc_dir, fail = optimizer.step( options) if self.verbose: logger.info( 'Iter %d[%d/%d] - Loss: %.3f - LR: %.3f - Func Evals: %0.0f - Grad Evals: %0.0f - fail: %0.0f' % (i + 1, minibatch_i, len(train_loader), loss.item(), lr, F_eval, G_eval, fail))
def train(self, train_x, train_y, n_devices, output_device, checkpoint_size, preconditioner_size, n_training_iter, n_restarts=1): likelihood = gpytorch.likelihoods.GaussianLikelihood( noise_constraint=gpytorch.constraints.GreaterThan(1e-3)).to( output_device) model = ExactGPModel(train_x, train_y, likelihood, n_devices, output_device).to(output_device) model.train() likelihood.train() optimizer = FullBatchLBFGS(model.parameters(), lr=.5) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \ gpytorch.settings.max_preconditioner_size(preconditioner_size): def closure(): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) return loss loss = closure() loss.backward() for i in range(n_training_iter): options = { 'closure': closure, 'current_loss': loss, 'max_ls': 20 } loss, _, _, _, _, _, _, fail = optimizer.step(options) if self.verbosity > 2: print_lengthscale = [ "%.3f" % p.item() for p in model.covar_module.module.lengthscale ] print( f'Iter {i+1}/{n_training_iter} - Loss: {"%.3f"%loss.item()} lengthscale: {print_lengthscale} noise: {"%.3f"%model.likelihood.noise.item()}' ) if fail: for pname, p in model.named_parameters(): print(pname, p.grad) if self.verbosity > 2: print('Convergence reached!') break if self.verbosity > 2: print( "Finished training on {0} data points using {1} GPUs.".format( train_x.size(-2), n_devices)) return model, likelihood, mll
# dom_points dom_points = test_x # STEP 1 # set appr params m2 = [150] tun2 = 30 diml = len(m2) # nr of latent outputs training_iterations = 200 regweight = 0.0001 mybestnet = gpnets.gpnet1_1_4(sigma_f=1,lengthscale=[1],sigma_n=1,covfunc=gprh.covfunc(type='matern',nu=2.5)) optimiser2 = FullBatchLBFGS(mybestnet.parameters(), lr=1, history_size=10) # STEP 3 int_method = 2 # 1) trapezoidal, 2) simpsons standard, 3) simpsons 3/8 ni = 200 # nr of intervals in numerical integration training_iterations2 = 80 regweight2 = 0.1 # optimiser optimiser3 = FullBatchLBFGS(mybestnet.parameters(), lr=1, history_size=10) lossfu3 = gprh.NegLOOCrossValidation_phi_noBackward(mybestnet.gp.covfunc) # lossfu3 = gprh.NegMarginalLogLikelihood_phi_noBackward(mybestnet.gp.covfunc)
#%% For loop through all problems to solve for problemName in sorted(problems): #%% Create instance of problem if problemName in Ns: sifParams = {'N': Ns[problemName]} else: sifParams = {} problem = pycutest.import_problem(problemName, sifParams=sifParams) model = CUTEstProblem(problem) #%% Define optimizer optimizer = FullBatchLBFGS(model.parameters(), lr=1, history_size=history_size, line_search=line_search, debug=True) #%% Main training loop if (out): print( '===================================================================================' ) print('Solving ' + problemName) print( '===================================================================================' ) print( ' Iter: | F | ||g|| | |x - y|/|x| | F Evals | alpha '