def minimize_lbfgs(self, objective): w = torch.rand(self.dim_w, 1,dtype=dtype, requires_grad=True) optimizer_w = LBFGS([w], lr=1, max_iter=10000, max_eval=15000, tolerance_grad=1e-09, tolerance_change=1e-11, history_size=100, line_search_fn='strong_wolfe') n_iterations = 10 L_min = 1e10; i_min=0; trailing_grad_norm = 0 trailing_objective = 0 if objective == 'bias_td_var_opt_cf' or objective == 'bias_td_var': trajectory_reward_feature = ((self.discount*self.rho[self.s,self.a]*self.r)[:,:,None]*self.phi_w(self.s)).sum(dim=1).t() trajectory_reward_feature_hat = ((self.discount*self.rho_hat[self.s,self.a]*self.r)[:,:,None]*self.phi_w(self.s)).sum(dim=1).t() #* creating bootstrap samples k=5000#self.n_trajectories average_reward_feature = torch.zeros(self.dim_w,k, dtype=dtype) for i in range(k): idx = torch.multinomial(torch.ones(self.n_trajectories)/self.n_trajectories,self.n_trajectories, replacement = True) # average_reward_feature[:,i] = torch.mean(trajectory_reward_feature[:,idx]/self.horizon_normalization, dim=1) average_reward_feature[:,i] = torch.mean(trajectory_reward_feature[:,idx], dim=1) def closure(): optimizer_w.zero_grad() f_w = torch.mm(w.t(), self.X_bias)+ self.y_bias.t() if objective == 'bias_opt_cf' or objective == 'bias': loss = torch.mm(f_w, f_w.t()) elif objective == 'bias_td_opt_cf' or objective == 'bias_td': loss = (torch.abs(torch.mm(f_w, self.v0))+math.sqrt(self.reg*torch.mm(torch.mm(f_w, self.M_inv), f_w.t())))**2 elif objective == 'bias_td_var_opt_cf' or objective == 'bias_td_var': bias = (torch.abs(torch.mm(f_w, self.v0))+math.sqrt(self.reg*torch.mm(torch.mm(f_w, self.M_inv), f_w.t())))**2 variance = 1/2*torch.var(torch.mm(w.t(), average_reward_feature)) #/ self.horizon_normalization**2 # variance = 1.0/k*((torch.mm(w.t(), average_reward_feature) - torch.mean(torch.mm(w.t(), trajectory_reward_feature)))**2).sum() loss = bias + variance loss.backward() return loss # pdb.set_trace() for i in range(n_iterations): L = optimizer_w.step(closure) trailing_objective = 1/(i+1)*L + i / (i+1)*trailing_objective if L<L_min: L_min = L; w_min = w.clone().detach(); i_min=i trailing_grad_norm = 1/(i+1)*torch.norm(w.grad) + i/(i+1)*trailing_grad_norm w_estimator = self.w_estimator(w) if i%100 ==0 and self.config.print_progress: print('\n') print('opt objective', objective) print('iteration ', i) print('trailing objective:', trailing_objective) print('current w estimator: ', w_estimator) print('reg:', self.reg) print('current objective:', L) print('min objective:', L_min) print('min iteration:', i_min) print('w min estimator:', self.w_estimator(w_min)) return self.w_estimator(w_min)
class LBFGSNoisyOptimizer(BaseOptimizer): def __init__(self, oracle: BaseConditionalGenerationOracle, x: torch.Tensor, lr: float = 1e-1, memory_size: int = 5, line_search='Wolfe', lr_algo='None', *args, **kwargs): super().__init__(oracle, x, *args, **kwargs) self._line_search = line_search self._lr = lr self._alpha_k = None self._lr_algo = lr_algo # None, grad, dim if not (lr_algo in ["None", "Grad", "Dim"]): ValueError("lr_algo is not right") if self._x_step: self._optimizer = LBFGS(params=[self._x], lr=self._x_step / 10., line_search=line_search, history_size=memory_size) else: self._optimizer = LBFGS(params=[self._x], lr=self._lr, line_search=line_search, history_size=memory_size) def _step(self): x_k = self._x.detach().clone() x_k.requires_grad_(True) self._optimizer.param_groups[0]['params'][0] = x_k init_time = time.time() f_k = self._oracle.func(x_k, num_repetitions=self._num_repetitions) g_k = self._oracle.grad(x_k, num_repetitions=self._num_repetitions) grad_normed = g_k # (g_k / g_k.norm()) self._state_dict = copy.deepcopy(self._optimizer.state_dict()) if self._lr_algo == "None": self._optimizer.param_groups[0]['lr'] = self._x_step elif self._lr_algo == "Grad": self._optimizer.param_groups[0]['lr'] = self._x_step / g_k.norm( ).item() elif self._lr_algo == "Dim": self._optimizer.param_groups[0]['lr'] = self._x_step / np.sqrt( chi2.ppf(0.95, df=len(g_k))) # define closure for line search def closure(): self._optimizer.zero_grad() loss = self._oracle.func(x_k, num_repetitions=self._num_repetitions) return loss # two-loop recursion to compute search direction p = self._optimizer.two_loop_recursion(-grad_normed) options = { 'closure': closure, 'current_loss': f_k, 'interpolate': False } if self._line_search == 'Wolfe': lbfg_opt = self._optimizer.step(p, grad_normed, options=options) f_k, d_k, lr = lbfg_opt[0], lbfg_opt[1], lbfg_opt[2] elif self._line_search == 'Armijo': lbfg_opt = self._optimizer.step(p, grad_normed, options=options) f_k, lr = lbfg_opt[0], lbfg_opt[1] d_k = -g_k elif self._line_search == 'None': # self._optimizer.param_groups[0]['lr'] = 1. d_k = -g_k lbfg_opt = self._optimizer.step(p, grad_normed, options=options) lr = lbfg_opt g_k = self._oracle.grad(x_k, num_repetitions=self._num_repetitions) grad_normed = g_k # (g_k / g_k.norm()) self._optimizer.curvature_update(grad_normed, eps=0.2, damping=False) self._lbfg_opt = lbfg_opt grad_norm = d_k.norm().item() self._x = x_k super()._post_step(init_time=init_time) if grad_norm < self._tolerance: return SUCCESS if not (torch.isfinite(x_k).all() and torch.isfinite(f_k).all() and torch.isfinite(d_k).all()): return COMP_ERROR def reverse_optimizer(self, **kwargs): self._optimizer.load_state_dict(self._state_dict)