def minimize_lbfgs(self, objective):
        w = torch.rand(self.dim_w, 1,dtype=dtype, requires_grad=True)
        optimizer_w = LBFGS([w], lr=1, max_iter=10000, max_eval=15000, tolerance_grad=1e-09, tolerance_change=1e-11, history_size=100, line_search_fn='strong_wolfe')
        n_iterations = 10
        L_min = 1e10; i_min=0;
        trailing_grad_norm = 0
        trailing_objective = 0
        if objective == 'bias_td_var_opt_cf' or objective == 'bias_td_var':
            trajectory_reward_feature = ((self.discount*self.rho[self.s,self.a]*self.r)[:,:,None]*self.phi_w(self.s)).sum(dim=1).t()
            trajectory_reward_feature_hat = ((self.discount*self.rho_hat[self.s,self.a]*self.r)[:,:,None]*self.phi_w(self.s)).sum(dim=1).t()
            #* creating bootstrap samples
            k=5000#self.n_trajectories
            average_reward_feature = torch.zeros(self.dim_w,k, dtype=dtype)
            for i in range(k):
                idx = torch.multinomial(torch.ones(self.n_trajectories)/self.n_trajectories,self.n_trajectories, replacement = True)
                # average_reward_feature[:,i] = torch.mean(trajectory_reward_feature[:,idx]/self.horizon_normalization, dim=1)
                average_reward_feature[:,i] = torch.mean(trajectory_reward_feature[:,idx], dim=1)

        
        def closure():
            optimizer_w.zero_grad()
            f_w = torch.mm(w.t(), self.X_bias)+ self.y_bias.t()
            if objective == 'bias_opt_cf' or objective == 'bias':
                loss = torch.mm(f_w, f_w.t())
            elif objective == 'bias_td_opt_cf' or objective == 'bias_td':
                loss = (torch.abs(torch.mm(f_w, self.v0))+math.sqrt(self.reg*torch.mm(torch.mm(f_w, self.M_inv), f_w.t())))**2
            elif objective == 'bias_td_var_opt_cf' or objective == 'bias_td_var':
                bias = (torch.abs(torch.mm(f_w, self.v0))+math.sqrt(self.reg*torch.mm(torch.mm(f_w, self.M_inv), f_w.t())))**2
                variance = 1/2*torch.var(torch.mm(w.t(), average_reward_feature)) #/ self.horizon_normalization**2
                # variance = 1.0/k*((torch.mm(w.t(), average_reward_feature) - torch.mean(torch.mm(w.t(), trajectory_reward_feature)))**2).sum()
                loss = bias + variance
            loss.backward()
            return loss
        # pdb.set_trace()
        for i in range(n_iterations):
            L = optimizer_w.step(closure)
            trailing_objective = 1/(i+1)*L + i / (i+1)*trailing_objective
            if L<L_min: L_min = L; w_min = w.clone().detach(); i_min=i
            trailing_grad_norm = 1/(i+1)*torch.norm(w.grad) + i/(i+1)*trailing_grad_norm
            w_estimator = self.w_estimator(w)
            if i%100 ==0 and self.config.print_progress:
                print('\n')
                print('opt objective', objective)
                print('iteration ', i)
                print('trailing objective:', trailing_objective)
                print('current w estimator: ', w_estimator)
                print('reg:', self.reg)
                print('current objective:', L)
                print('min objective:', L_min)
                print('min iteration:', i_min)
                print('w min estimator:', self.w_estimator(w_min))
        return self.w_estimator(w_min)
Esempio n. 2
0
class LBFGSNoisyOptimizer(BaseOptimizer):
    def __init__(self,
                 oracle: BaseConditionalGenerationOracle,
                 x: torch.Tensor,
                 lr: float = 1e-1,
                 memory_size: int = 5,
                 line_search='Wolfe',
                 lr_algo='None',
                 *args,
                 **kwargs):
        super().__init__(oracle, x, *args, **kwargs)
        self._line_search = line_search
        self._lr = lr
        self._alpha_k = None
        self._lr_algo = lr_algo  # None, grad, dim
        if not (lr_algo in ["None", "Grad", "Dim"]):
            ValueError("lr_algo is not right")
        if self._x_step:
            self._optimizer = LBFGS(params=[self._x],
                                    lr=self._x_step / 10.,
                                    line_search=line_search,
                                    history_size=memory_size)
        else:
            self._optimizer = LBFGS(params=[self._x],
                                    lr=self._lr,
                                    line_search=line_search,
                                    history_size=memory_size)

    def _step(self):
        x_k = self._x.detach().clone()
        x_k.requires_grad_(True)
        self._optimizer.param_groups[0]['params'][0] = x_k
        init_time = time.time()
        f_k = self._oracle.func(x_k, num_repetitions=self._num_repetitions)
        g_k = self._oracle.grad(x_k, num_repetitions=self._num_repetitions)
        grad_normed = g_k  # (g_k / g_k.norm())
        self._state_dict = copy.deepcopy(self._optimizer.state_dict())

        if self._lr_algo == "None":
            self._optimizer.param_groups[0]['lr'] = self._x_step
        elif self._lr_algo == "Grad":
            self._optimizer.param_groups[0]['lr'] = self._x_step / g_k.norm(
            ).item()
        elif self._lr_algo == "Dim":
            self._optimizer.param_groups[0]['lr'] = self._x_step / np.sqrt(
                chi2.ppf(0.95, df=len(g_k)))
        # define closure for line search
        def closure():
            self._optimizer.zero_grad()
            loss = self._oracle.func(x_k,
                                     num_repetitions=self._num_repetitions)
            return loss

        # two-loop recursion to compute search direction
        p = self._optimizer.two_loop_recursion(-grad_normed)
        options = {
            'closure': closure,
            'current_loss': f_k,
            'interpolate': False
        }
        if self._line_search == 'Wolfe':
            lbfg_opt = self._optimizer.step(p, grad_normed, options=options)
            f_k, d_k, lr = lbfg_opt[0], lbfg_opt[1], lbfg_opt[2]
        elif self._line_search == 'Armijo':
            lbfg_opt = self._optimizer.step(p, grad_normed, options=options)
            f_k, lr = lbfg_opt[0], lbfg_opt[1]
            d_k = -g_k
        elif self._line_search == 'None':
            # self._optimizer.param_groups[0]['lr'] = 1.
            d_k = -g_k
            lbfg_opt = self._optimizer.step(p, grad_normed, options=options)
            lr = lbfg_opt
        g_k = self._oracle.grad(x_k, num_repetitions=self._num_repetitions)
        grad_normed = g_k  # (g_k / g_k.norm())
        self._optimizer.curvature_update(grad_normed, eps=0.2, damping=False)
        self._lbfg_opt = lbfg_opt
        grad_norm = d_k.norm().item()
        self._x = x_k

        super()._post_step(init_time=init_time)

        if grad_norm < self._tolerance:
            return SUCCESS
        if not (torch.isfinite(x_k).all() and torch.isfinite(f_k).all()
                and torch.isfinite(d_k).all()):
            return COMP_ERROR

    def reverse_optimizer(self, **kwargs):
        self._optimizer.load_state_dict(self._state_dict)