Example #1
0
def test_beta_jac(key):
    """Tests that algorithms computing the Jacobian return the same Jacobian"""
    if key == 'svm':
        return True
    if key == "svm" or key == "svr" or key == "ssvr":
        X_s = X_r
    else:
        X_s = X_c
    supp1, dense1, jac1 = compute_beta(X,
                                       y,
                                       dict_log_alpha[key],
                                       tol=tol,
                                       model=models[key])
    supp2, dense2, jac2 = get_bet_jac_implicit_forward(X,
                                                       y,
                                                       dict_log_alpha[key],
                                                       tol=tol,
                                                       model=models[key],
                                                       tol_jac=tol)
    supp3, dense3, jac3 = compute_beta(X_s,
                                       y,
                                       dict_log_alpha[key],
                                       tol=tol,
                                       model=models[key])
    supp4, dense4, jac4 = get_bet_jac_implicit_forward(X_s,
                                                       y,
                                                       dict_log_alpha[key],
                                                       tol=tol,
                                                       model=models[key],
                                                       tol_jac=tol)

    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
    assert np.allclose(jac1, jac2, atol=1e-6)

    assert np.all(supp2 == supp3)
    assert np.allclose(dense2, dense3)
    assert np.allclose(jac2, jac3, atol=1e-6)

    assert np.all(supp3 == supp4)
    assert np.allclose(dense3, dense4)
    assert np.allclose(jac3, jac4, atol=1e-6)

    compute_beta_grad_implicit(X,
                               y,
                               dict_log_alpha[key],
                               get_grad_outer,
                               model=models[key])
Example #2
0
    def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3):
        """Get value of criterion.

        Parameters
        ----------
        model: instance of ``sparse_ho.base.BaseModel``
            A model that follows the sparse_ho API.
        X: array-like, shape (n_samples, n_features)
            Design matrix.
        y: ndarray, shape (n_samples,)
            Observation vector.
        log_alpha: float or np.array
            Logarithm of hyperparameter.
        monitor: instance of Monitor.
            Monitor.
        tol: float, optional (default=1e-3)
            Tolerance for the inner problem.
        """
        mask, dense, _ = compute_beta(X[self.idx_train],
                                      y[self.idx_train],
                                      log_alpha,
                                      model,
                                      mask0=self.mask0,
                                      dense0=self.dense0,
                                      tol=tol,
                                      compute_jac=False)
        value_outer = self.get_val_outer(X[self.idx_val, :], y[self.idx_val],
                                         mask, dense)

        self.mask0 = mask
        self.dense0 = dense

        if monitor is not None:
            monitor(value_outer, None, alpha=np.exp(log_alpha))
        return value_outer
Example #3
0
    def get_val(self, model, X, y, log_alpha, tol=1e-3):
        """Get value of criterion.

        Parameters
        ----------
        model: instance of ``sparse_ho.base.BaseModel``
            A model that follows the sparse_ho API.
        X: array-like, shape (n_samples, n_features)
            Design matrix.
        y: ndarray, shape (n_samples,)
            Observation vector.
        log_alpha: float or np.array
            Logarithm of hyperparameter.
        tol: float, optional (default=1e-3)
            Tolerance for the inner problem.
        """
        # TODO add maxiter param for all get_val
        mask, dense, _ = compute_beta(X,
                                      y,
                                      log_alpha,
                                      model,
                                      tol=tol,
                                      compute_jac=False)
        val = self.get_val_outer(X[self.idx_val], y[self.idx_val], mask, dense)
        return val
Example #4
0
    def get_val(self, model, X, y, log_alpha, monitor=None, tol=1e-3):
        """Get value of criterion.

        Parameters
        ----------
        model: instance of ``sparse_ho.base.BaseModel``
            A model that follows the sparse_ho API.
        X: array-like, shape (n_samples, n_features)
            Design matrix.
        y: ndarray, shape (n_samples,)
            Observation vector.
        log_alpha: float or np.array
            Logarithm of hyperparameter.
        monitor: instance of Monitor.
            Monitor.
        tol: float, optional (default=1e-3)
            Tolerance for the inner problem.
        """
        if not self.init_delta_epsilon:
            self._init_delta_epsilon(X)
        mask, dense, _ = compute_beta(X,
                                      y,
                                      log_alpha,
                                      model,
                                      tol=tol,
                                      mask0=self.mask0,
                                      dense0=self.dense0,
                                      compute_jac=False)
        mask2, dense2, _ = compute_beta(X,
                                        y + self.epsilon * self.delta,
                                        log_alpha,
                                        model,
                                        mask0=self.mask02,
                                        dense0=self.dense02,
                                        tol=tol,
                                        compute_jac=False)

        self.mask0 = None
        self.dense0 = None
        self.mask02 = None
        self.dense02 = None

        val = self.get_val_outer(X, y, mask, dense, mask2, dense2)
        if monitor is not None:
            monitor(val, None, mask, dense, alpha=np.exp(log_alpha))
        return val
Example #5
0
    def compute_beta_grad(
            self, X, y, log_alpha, model, get_grad_outer, mask0=None,
            dense0=None, quantity_to_warm_start=None, max_iter=1000, tol=1e-3,
            full_jac_v=False):
        """Compute beta and hypergradient with backward differentiation of
        proximal coordinate descent.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Design matrix.
        y: ndarray, shape (n_samples,)
            Observation vector.
        log_alpha: float or np.array, shape (n_features,)
            Logarithm of hyperparameter.
        model:  instance of ``sparse_ho.base.BaseModel``
            A model that follows the sparse_ho API.
        get_grad_outer: callable
            Function which returns the gradient of the outer criterion.
        mask0: ndarray, shape (n_features,)
            Boolean of active feature of the previous regression coefficients
            beta for warm start.
        dense0: ndarray, shape (mask.sum(),)
            Initial value of the previous regression coefficients
            beta for warm start.
        quantity_to_warm_start: ndarray
            Previous Jacobian of the inner optimization problem.
        max_iter: int
            Maximum number of iteration for the inner solver.
        tol: float
            The tolerance for the inner optimization problem.
        full_jac_v: bool
            TODO
        """

        # 1 compute the regression coefficients beta
        mask, dense, list_sign = compute_beta(
            X, y, log_alpha, model, mask0=mask0, dense0=dense0,
            jac0=None, max_iter=max_iter, tol=tol,
            compute_jac=False, return_all=True,
            use_stop_crit=self.use_stop_crit)
        v = np.zeros(X.shape[1])
        v[mask] = get_grad_outer(mask, dense)
        # 2 compute the gradient in a backward way
        grad = get_grad_backward(
            X, np.exp(log_alpha), list_sign, v, model,
            jac_v0=quantity_to_warm_start)

        if not full_jac_v:
            grad = model.get_mask_jac_v(mask, grad)

        grad = np.atleast_1d(grad)
        return mask, dense, grad, grad
Example #6
0
def get_bet_jac_implicit_forward(X,
                                 y,
                                 log_alpha,
                                 model,
                                 mask0=None,
                                 dense0=None,
                                 jac0=None,
                                 tol=1e-3,
                                 max_iter=1000,
                                 niter_jac=1000,
                                 tol_jac=1e-6,
                                 verbose=False,
                                 use_stop_crit=True):

    mask, dense, _ = compute_beta(X,
                                  y,
                                  log_alpha,
                                  mask0=mask0,
                                  dense0=dense0,
                                  jac0=jac0,
                                  tol=tol,
                                  max_iter=max_iter,
                                  compute_jac=False,
                                  model=model,
                                  verbose=verbose,
                                  use_stop_crit=use_stop_crit)
    dbeta0_new = model._init_dbeta0(mask, mask0, jac0)
    reduce_alpha = model._reduce_alpha(np.exp(log_alpha), mask)

    _, dual_var = model._init_beta_dual_var(X, y, mask, dense)
    jac = get_only_jac(model.reduce_X(X, mask),
                       model.reduce_y(y, mask),
                       dual_var,
                       reduce_alpha,
                       model.sign(dense, log_alpha),
                       dbeta=dbeta0_new,
                       niter_jac=niter_jac,
                       tol_jac=tol_jac,
                       model=model,
                       mask=mask,
                       dense=dense,
                       verbose=verbose,
                       use_stop_crit=use_stop_crit)

    return mask, dense, jac
Example #7
0
def compute_beta_grad_implicit(X,
                               y,
                               log_alpha,
                               get_grad_outer,
                               mask0=None,
                               dense0=None,
                               tol=1e-3,
                               model="lasso",
                               max_iter=1000,
                               sol_lin_sys=None,
                               tol_lin_sys=1e-6,
                               max_iter_lin_sys=100):
    """Compute beta and the hypergradient with implicit differentiation.

    The hypergradient computation is done in 3 steps:
    - 1 solve the inner optimization problem.
    - 2 solve a linear system on the support (ie the non-zeros coefficients)
    of the solution.
    - 3 use the solution of the linear system to compute the gradient.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
        Design matrix.
    y: ndarray, shape (n_samples,)
        Observation vector.
    log_alpha: float or np.array, shape (n_features,)
        Logarithm of hyperparameter.
    mask0: ndarray, shape (n_features,)
        Boolean of active feature of the previous regression coefficients
        beta for warm start.
    dense0: ndarray, shape (mask.sum(),)
        Initial value of the previous regression coefficients
        beta for warm start.
    tol: float
        The tolerance for the inner optimization problem.
    model:  instance of ``sparse_ho.base.BaseModel``
        A model that follows the sparse_ho API.
    max_iter: int
        Maximum number of iterations for the inner solver.
    sol_lin_sys: ndarray
        Previous solution of the linear system for warm start.
    tol_lin_sys: float
        Tolerance for the resolution of the linear system.
    max_iter_lin_sys: int
        Maximum number of iterations for the resolution of the linear system.
    """

    # 1 compute the regression coefficients beta, stored in mask and dense
    alpha = np.exp(log_alpha)
    mask, dense, _ = compute_beta(X,
                                  y,
                                  log_alpha,
                                  mask0=mask0,
                                  dense0=dense0,
                                  tol=tol,
                                  max_iter=max_iter,
                                  compute_jac=False,
                                  model=model)
    n_features = X.shape[1]

    mat_to_inv = model.get_mat_vec(X, y, mask, dense, log_alpha)

    v = get_grad_outer(mask, dense)
    if hasattr(model, 'dual'):
        v = model.get_dual_v(mask, dense, X, y, v, log_alpha)

    # 2 solve the linear system
    # TODO I think this should be removed
    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()
    if sol_lin_sys is not None and not hasattr(model, 'dual'):
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        sol0 = None  # TODO add warm start for SVM and SVR
    sol = cg(mat_to_inv,
             -model.generalized_supp(X, v, log_alpha),
             x0=sol0,
             tol=tol_lin_sys,
             maxiter=max_iter_lin_sys)
    sol_lin_sys = sol[0]

    # 3 compute the gradient
    grad = model._get_grad(X, y, sol_lin_sys, mask, dense, alphas, v)
    return mask, dense, grad, sol_lin_sys