Beispiel #1
0
 def _init_dbeta0(mask, mask0, jac0):
     size_mat = mask.sum()
     if jac0 is not None:
         dbeta0_new = init_dbeta0_new(jac0, mask, mask0)
     else:
         dbeta0_new = np.zeros(size_mat)
     return dbeta0_new
Beispiel #2
0
def get_beta_jac_t_v_implicit(
        X_train, y_train, log_alpha, get_v, mask0=None, dense0=None, tol=1e-3,
        model="lasso", sk=False, max_iter=1000, sol_lin_sys=None, n=1,
        sigma=0, delta=0, epsilon=0):
    alpha = np.exp(log_alpha)
    n_samples, n_features = X_train.shape

    mask, dense, _ = get_beta_jac_iterdiff(
        X_train, y_train, log_alpha, mask0=mask0, dense0=dense0,
        tol=tol, max_iter=max_iter, compute_jac=False, model=model)
    mat_to_inv = model.get_hessian(X_train, y_train, mask, dense, log_alpha)
    size_mat = mat_to_inv.shape[0]
    v = get_v(mask, dense)
    if hasattr(model, 'dual'):
        v = model.get_dual_v(X_train, y_train, v, log_alpha)
    # TODO: to clean
    is_sparse = issparse(X_train)
    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()

    if sol_lin_sys is not None and not hasattr(model, 'dual'):
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        size_mat = mat_to_inv.shape[0]
        sol0 = np.zeros(size_mat)
    try:
        sol = cg(
            mat_to_inv, - model.generalized_supp(X_train, v, log_alpha),
            # x0=sol0, tol=tol, maxiter=1e5)
            x0=sol0, tol=tol)
        if sol[1] == 0:
            sol_lin_sys = sol[0]
        else:
            raise ValueError('cg did not converge.')
    except Exception:
        print("Matrix to invert was badly conditioned")
        size_mat = mat_to_inv.shape[0]
        if is_sparse:
            reg_amount = 1e-7 * norm(model.reduce_X(X_train, mask).todense(),
                                     ord=2) ** 2
            mat_to_inv += reg_amount * identity(size_mat)
        else:
            reg_amount = 1e-7 * norm(model.reduce_X(X_train, mask), ord=2) ** 2
            mat_to_inv += reg_amount * np.eye(size_mat)
        sol = cg(
            mat_to_inv + reg_amount * identity(size_mat),
            - model.generalized_supp(X_train, v, log_alpha),
            x0=sol0, atol=1e-3)

    sol_lin_sys = sol[0]

    jac_t_v = model._get_jac_t_v(
        X_train, y_train, sol_lin_sys, mask, dense, alphas, v.copy(),
        n_samples)
    return mask, dense, jac_t_v, sol[0]
Beispiel #3
0
def init_dbeta0_mcp(jac0, mask, mask0):
    # mask_both = np.logical_and(mask_old, mask)
    size_mat = mask.sum()
    dbeta0_new = np.zeros((size_mat, 2))
    # count = 0
    # count_old = 0
    # n_features = mask.shape[0]
    for j in range(2):
        dbeta0_new[:, j] = init_dbeta0_new(jac0[:, j], mask, mask0)
    return dbeta0_new
Beispiel #4
0
def get_beta_jac_t_v_implicit(
        X_train, y_train, log_alpha, X_val, y_val,
        mask0=None, dense0=None, jac0=None, tol=1e-3, model="lasso",
        sk=False, maxit=1000, sol_lin_sys=None, criterion="cv", n=1,
        sigma=0, delta=0, epsilon=0):
    alpha = np.exp(log_alpha)
    n_samples, n_features = X_train.shape
    # compute beta using sklearn lasso
    if sk:
        clf = Lasso(
            alpha=alpha, fit_intercept=False, warm_start=True, tol=tol,
            max_iter=10000)
        clf.fit(X, y)
        coef_ = clf.coef_
        mask = coef_ != 0
        dense = coef_[mask]
    # compute beta using vanilla numba cd lasso
    else:
        mask, dense = get_beta_jac_iterdiff(
            X_train, y_train, log_alpha, mask0=mask0, dense0=dense0,
            maxit=maxit, tol=tol,
            compute_jac=False, jac0=None)

    # v = 2 * X_val[:, mask].T @ (
    #     X_val[:, mask] @ dense - y_val) / X_val.shape[0]

    if criterion == "cv":
        v = 2 * X_val[:, mask].T @ (
            X_val[:, mask] @ dense - y_val) / X_val.shape[0]
    elif criterion == "sure":
        if n == 1:
            v = 2 * X_train[:, mask].T @ (
                X_train[:, mask] @dense -
                y_train - 2 * sigma ** 2 / epsilon * delta)
        elif n == 2:
            v = 2 * sigma ** 2 * X_train[:, mask].T @ delta / epsilon

    is_sparse = issparse(X_train)

    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()

    if sol_lin_sys is not None:
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        size_mat = mask.sum()
        sol0 = np.zeros(size_mat)

    mat_to_inv = X_train[:, mask].T @ X_train[:, mask]
    size_mat = mask.sum()

    if is_sparse:
        try:
            # reg_amount = 1e-7 * norm(X_train[:, mask].todense(), ord=2) ** 2
            # mat_to_inv += reg_amount * identity(size_mat)
            sol = cg(
                mat_to_inv, - n_samples * v,
                # x0=sol0, tol=tol, maxiter=1e5)
                x0=sol0, tol=1e-15, maxiter=1e5)
                # x0=sol0, atol=1e-3)
            # sol = cg(
            #     mat_to_inv, - alpha * n_samples * v,
            #     x0=sol0, atol=1e-3)
            if sol[1] == 0:
                jac = sol[0]
            else:
                raise ValueError('cg did not converge.')
        except:
            print("Matrix to invert was badly conditioned")
            size_mat = mask.sum()
            reg_amount = 1e-7 * norm(X_train[:, mask].todense(), ord=2) ** 2
            sol = cg(
                mat_to_inv + reg_amount * identity(size_mat),
                - n_samples * v, x0=sol0,
                # - alpha * n_samples * v, x0=sol0,
                atol=1e-3)
            jac = sol[0]
    else:
        try:
            jac = solve(
                X_train[:, mask].T @ X_train[:, mask],
                - n_samples * v,
                sym_pos=True, assume_a='pos')
            # import ipdb; ipdb.set_trace()
        except:
            print("Matrix to invert was badly conditioned")
            size_mat = mask.sum()
            reg_amount = 1e-9 * norm(X_train[:, mask], ord=2) ** 2
            jac = solve(
                X_train[:, mask].T @ X_train[:, mask] +
                reg_amount * np.eye(size_mat),
                - n_samples * v,
                sym_pos=True, assume_a='pos')

    if model == "lasso":
        jac_t_v = alpha * np.sign(dense) @ jac
    elif model == "wlasso":
        jac_t_v = np.zeros(n_features)
        jac_t_v[mask] = alphas[mask] * np.sign(dense) * jac

    return mask, dense, jac_t_v, jac
Beispiel #5
0
def get_beta_jac_fast_iterdiff(X,
                               y,
                               log_alpha,
                               X_val,
                               y_val,
                               mask0=None,
                               dense0=None,
                               jac0=None,
                               tol=1e-3,
                               maxit=100,
                               niter_jac=1000,
                               tol_jac=1e-6,
                               model="lasso",
                               criterion="cv",
                               sigma=1,
                               epsilon=0.1,
                               delta=None,
                               n=1):
    n_samples, n_features = X.shape

    if model == "mcp":
        mask, dense = get_beta_jac_iterdiff(X,
                                            y,
                                            log_alpha,
                                            mask0=mask0,
                                            dense0=dense0,
                                            jac0=jac0,
                                            tol=tol,
                                            maxit=maxit,
                                            compute_jac=False,
                                            model="mcp")
    else:
        mask, dense = get_beta_jac_iterdiff(X,
                                            y,
                                            log_alpha,
                                            mask0=mask0,
                                            dense0=dense0,
                                            jac0=jac0,
                                            tol=tol,
                                            maxit=maxit,
                                            compute_jac=False,
                                            model="lasso")

    # TODO this is dirty, to improve and to jit
    size_mat = mask.sum()
    if model == "lasso":
        if jac0 is not None:
            dbeta0_new = init_dbeta0_new(jac0, mask, mask0)
        else:
            dbeta0_new = np.zeros(size_mat)
    elif model == "mcp":
        # TODO add warm start
        if jac0 is None:
            dbeta0_new = np.zeros((size_mat, 2))
        else:
            dbeta0_new = init_dbeta0_mcp(jac0, mask, mask0)
    else:
        if jac0 is None:
            dbeta0_new = np.zeros((size_mat, size_mat))
        else:
            dbeta0_new = init_dbeta0_new_p(jac0, mask, mask0)

    if criterion == "cv":
        v = 2 * X_val[:, mask].T @ (X_val[:, mask] @ dense -
                                    y_val) / X_val.shape[0]
    elif criterion == "sure":
        if n == 1:
            v = 2 * X[:, mask].T @ (X[:, mask] @ dense - y -
                                    2 * sigma**2 / epsilon * delta)
        elif n == 2:
            v = 2 * sigma**2 * X[:, mask].T @ delta / epsilon
    jac = get_only_jac(X[:, mask],
                       np.exp(log_alpha),
                       np.sign(dense),
                       v,
                       dbeta=dbeta0_new,
                       niter_jac=niter_jac,
                       tol_jac=tol_jac,
                       model=model,
                       mask=mask,
                       dense=dense)

    return mask, dense, jac
Beispiel #6
0
def compute_beta_grad_implicit(X,
                               y,
                               log_alpha,
                               get_grad_outer,
                               mask0=None,
                               dense0=None,
                               tol=1e-3,
                               model="lasso",
                               max_iter=1000,
                               sol_lin_sys=None,
                               tol_lin_sys=1e-6,
                               max_iter_lin_sys=100):
    """Compute beta and the hypergradient with implicit differentiation.

    The hypergradient computation is done in 3 steps:
    - 1 solve the inner optimization problem.
    - 2 solve a linear system on the support (ie the non-zeros coefficients)
    of the solution.
    - 3 use the solution of the linear system to compute the gradient.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
        Design matrix.
    y: ndarray, shape (n_samples,)
        Observation vector.
    log_alpha: float or np.array, shape (n_features,)
        Logarithm of hyperparameter.
    mask0: ndarray, shape (n_features,)
        Boolean of active feature of the previous regression coefficients
        beta for warm start.
    dense0: ndarray, shape (mask.sum(),)
        Initial value of the previous regression coefficients
        beta for warm start.
    tol: float
        The tolerance for the inner optimization problem.
    model:  instance of ``sparse_ho.base.BaseModel``
        A model that follows the sparse_ho API.
    max_iter: int
        Maximum number of iterations for the inner solver.
    sol_lin_sys: ndarray
        Previous solution of the linear system for warm start.
    tol_lin_sys: float
        Tolerance for the resolution of the linear system.
    max_iter_lin_sys: int
        Maximum number of iterations for the resolution of the linear system.
    """

    # 1 compute the regression coefficients beta, stored in mask and dense
    alpha = np.exp(log_alpha)
    mask, dense, _ = compute_beta(X,
                                  y,
                                  log_alpha,
                                  mask0=mask0,
                                  dense0=dense0,
                                  tol=tol,
                                  max_iter=max_iter,
                                  compute_jac=False,
                                  model=model)
    n_features = X.shape[1]

    mat_to_inv = model.get_mat_vec(X, y, mask, dense, log_alpha)

    v = get_grad_outer(mask, dense)
    if hasattr(model, 'dual'):
        v = model.get_dual_v(mask, dense, X, y, v, log_alpha)

    # 2 solve the linear system
    # TODO I think this should be removed
    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()
    if sol_lin_sys is not None and not hasattr(model, 'dual'):
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        sol0 = None  # TODO add warm start for SVM and SVR
    sol = cg(mat_to_inv,
             -model.generalized_supp(X, v, log_alpha),
             x0=sol0,
             tol=tol_lin_sys,
             maxiter=max_iter_lin_sys)
    sol_lin_sys = sol[0]

    # 3 compute the gradient
    grad = model._get_grad(X, y, sol_lin_sys, mask, dense, alphas, v)
    return mask, dense, grad, sol_lin_sys