def test_beta_jac(key): """Tests that algorithms computing the Jacobian return the same Jacobian""" if key == 'svm': return True if key == "svm" or key == "svr" or key == "ssvr": X_s = X_r else: X_s = X_c supp1, dense1, jac1 = compute_beta(X, y, dict_log_alpha[key], tol=tol, model=models[key]) supp2, dense2, jac2 = get_bet_jac_implicit_forward(X, y, dict_log_alpha[key], tol=tol, model=models[key], tol_jac=tol) supp3, dense3, jac3 = compute_beta(X_s, y, dict_log_alpha[key], tol=tol, model=models[key]) supp4, dense4, jac4 = get_bet_jac_implicit_forward(X_s, y, dict_log_alpha[key], tol=tol, model=models[key], tol_jac=tol) assert np.all(supp1 == supp2) assert np.allclose(dense1, dense2) assert np.allclose(jac1, jac2, atol=1e-6) assert np.all(supp2 == supp3) assert np.allclose(dense2, dense3) assert np.allclose(jac2, jac3, atol=1e-6) assert np.all(supp3 == supp4) assert np.allclose(dense3, dense4) assert np.allclose(jac3, jac4, atol=1e-6) compute_beta_grad_implicit(X, y, dict_log_alpha[key], get_grad_outer, model=models[key])
def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3): """Get value of criterion. Parameters ---------- model: instance of ``sparse_ho.base.BaseModel`` A model that follows the sparse_ho API. X: array-like, shape (n_samples, n_features) Design matrix. y: ndarray, shape (n_samples,) Observation vector. log_alpha: float or np.array Logarithm of hyperparameter. monitor: instance of Monitor. Monitor. tol: float, optional (default=1e-3) Tolerance for the inner problem. """ mask, dense, _ = compute_beta(X[self.idx_train], y[self.idx_train], log_alpha, model, mask0=self.mask0, dense0=self.dense0, tol=tol, compute_jac=False) value_outer = self.get_val_outer(X[self.idx_val, :], y[self.idx_val], mask, dense) self.mask0 = mask self.dense0 = dense if monitor is not None: monitor(value_outer, None, alpha=np.exp(log_alpha)) return value_outer
def get_val(self, model, X, y, log_alpha, tol=1e-3): """Get value of criterion. Parameters ---------- model: instance of ``sparse_ho.base.BaseModel`` A model that follows the sparse_ho API. X: array-like, shape (n_samples, n_features) Design matrix. y: ndarray, shape (n_samples,) Observation vector. log_alpha: float or np.array Logarithm of hyperparameter. tol: float, optional (default=1e-3) Tolerance for the inner problem. """ # TODO add maxiter param for all get_val mask, dense, _ = compute_beta(X, y, log_alpha, model, tol=tol, compute_jac=False) val = self.get_val_outer(X[self.idx_val], y[self.idx_val], mask, dense) return val
def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3): """Get value of criterion. Parameters ---------- model: instance of ``sparse_ho.base.BaseModel`` A model that follows the sparse_ho API. X: array-like, shape (n_samples, n_features) Design matrix. y: ndarray, shape (n_samples,) Observation vector. log_alpha: float or np.array Logarithm of hyperparameter. monitor: instance of Monitor. Monitor. tol: float, optional (default=1e-3) Tolerance for the inner problem. """ if not self.init_delta_epsilon: self._init_delta_epsilon(X) mask, dense, _ = compute_beta(X, y, log_alpha, model, tol=tol, mask0=self.mask0, dense0=self.dense0, compute_jac=False) mask2, dense2, _ = compute_beta(X, y + self.epsilon * self.delta, log_alpha, model, mask0=self.mask02, dense0=self.dense02, tol=tol, compute_jac=False) self.mask0 = None self.dense0 = None self.mask02 = None self.dense02 = None val = self.get_val_outer(X, y, mask, dense, mask2, dense2) if monitor is not None: monitor(val, None, mask, dense, alpha=np.exp(log_alpha)) return val
def compute_beta_grad( self, X, y, log_alpha, model, get_grad_outer, mask0=None, dense0=None, quantity_to_warm_start=None, max_iter=1000, tol=1e-3, full_jac_v=False): """Compute beta and hypergradient with backward differentiation of proximal coordinate descent. Parameters ---------- X: array-like, shape (n_samples, n_features) Design matrix. y: ndarray, shape (n_samples,) Observation vector. log_alpha: float or np.array, shape (n_features,) Logarithm of hyperparameter. model: instance of ``sparse_ho.base.BaseModel`` A model that follows the sparse_ho API. get_grad_outer: callable Function which returns the gradient of the outer criterion. mask0: ndarray, shape (n_features,) Boolean of active feature of the previous regression coefficients beta for warm start. dense0: ndarray, shape (mask.sum(),) Initial value of the previous regression coefficients beta for warm start. quantity_to_warm_start: ndarray Previous Jacobian of the inner optimization problem. max_iter: int Maximum number of iteration for the inner solver. tol: float The tolerance for the inner optimization problem. full_jac_v: bool TODO """ # 1 compute the regression coefficients beta mask, dense, list_sign = compute_beta( X, y, log_alpha, model, mask0=mask0, dense0=dense0, jac0=None, max_iter=max_iter, tol=tol, compute_jac=False, return_all=True, use_stop_crit=self.use_stop_crit) v = np.zeros(X.shape[1]) v[mask] = get_grad_outer(mask, dense) # 2 compute the gradient in a backward way grad = get_grad_backward( X, np.exp(log_alpha), list_sign, v, model, jac_v0=quantity_to_warm_start) if not full_jac_v: grad = model.get_mask_jac_v(mask, grad) grad = np.atleast_1d(grad) return mask, dense, grad, grad
def get_bet_jac_implicit_forward(X, y, log_alpha, model, mask0=None, dense0=None, jac0=None, tol=1e-3, max_iter=1000, niter_jac=1000, tol_jac=1e-6, verbose=False, use_stop_crit=True): mask, dense, _ = compute_beta(X, y, log_alpha, mask0=mask0, dense0=dense0, jac0=jac0, tol=tol, max_iter=max_iter, compute_jac=False, model=model, verbose=verbose, use_stop_crit=use_stop_crit) dbeta0_new = model._init_dbeta0(mask, mask0, jac0) reduce_alpha = model._reduce_alpha(np.exp(log_alpha), mask) _, dual_var = model._init_beta_dual_var(X, y, mask, dense) jac = get_only_jac(model.reduce_X(X, mask), model.reduce_y(y, mask), dual_var, reduce_alpha, model.sign(dense, log_alpha), dbeta=dbeta0_new, niter_jac=niter_jac, tol_jac=tol_jac, model=model, mask=mask, dense=dense, verbose=verbose, use_stop_crit=use_stop_crit) return mask, dense, jac
def compute_beta_grad_implicit(X, y, log_alpha, get_grad_outer, mask0=None, dense0=None, tol=1e-3, model="lasso", max_iter=1000, sol_lin_sys=None, tol_lin_sys=1e-6, max_iter_lin_sys=100): """Compute beta and the hypergradient with implicit differentiation. The hypergradient computation is done in 3 steps: - 1 solve the inner optimization problem. - 2 solve a linear system on the support (ie the non-zeros coefficients) of the solution. - 3 use the solution of the linear system to compute the gradient. Parameters ---------- X: array-like, shape (n_samples, n_features) Design matrix. y: ndarray, shape (n_samples,) Observation vector. log_alpha: float or np.array, shape (n_features,) Logarithm of hyperparameter. mask0: ndarray, shape (n_features,) Boolean of active feature of the previous regression coefficients beta for warm start. dense0: ndarray, shape (mask.sum(),) Initial value of the previous regression coefficients beta for warm start. tol: float The tolerance for the inner optimization problem. model: instance of ``sparse_ho.base.BaseModel`` A model that follows the sparse_ho API. max_iter: int Maximum number of iterations for the inner solver. sol_lin_sys: ndarray Previous solution of the linear system for warm start. tol_lin_sys: float Tolerance for the resolution of the linear system. max_iter_lin_sys: int Maximum number of iterations for the resolution of the linear system. """ # 1 compute the regression coefficients beta, stored in mask and dense alpha = np.exp(log_alpha) mask, dense, _ = compute_beta(X, y, log_alpha, mask0=mask0, dense0=dense0, tol=tol, max_iter=max_iter, compute_jac=False, model=model) n_features = X.shape[1] mat_to_inv = model.get_mat_vec(X, y, mask, dense, log_alpha) v = get_grad_outer(mask, dense) if hasattr(model, 'dual'): v = model.get_dual_v(mask, dense, X, y, v, log_alpha) # 2 solve the linear system # TODO I think this should be removed if not alpha.shape: alphas = np.ones(n_features) * alpha else: alphas = alpha.copy() if sol_lin_sys is not None and not hasattr(model, 'dual'): sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0) else: sol0 = None # TODO add warm start for SVM and SVR sol = cg(mat_to_inv, -model.generalized_supp(X, v, log_alpha), x0=sol0, tol=tol_lin_sys, maxiter=max_iter_lin_sys) sol_lin_sys = sol[0] # 3 compute the gradient grad = model._get_grad(X, y, sol_lin_sys, mask, dense, alphas, v) return mask, dense, grad, sol_lin_sys