Exemple #1
0
def test_beta_jac_custom():
    supp, dense, jac = get_beta_jac_fast_iterdiff(
        X_train,
        y_train,
        np.array([log_alpha1, log_alpha2]),
        get_v,
        tol=tol,
        model=model,
        tol_jac=1e-16,
        max_iter=max_iter,
        niter_jac=10000)
    supp_custom, dense_custom, jac_custom = get_beta_jac_fast_iterdiff(
        X_train,
        y_train,
        np.array([log_alpha1, log_alpha2]),
        get_v,
        tol=tol,
        model=model_custom,
        tol_jac=1e-16,
        max_iter=max_iter,
        niter_jac=10000)

    assert np.allclose(dense, dense_custom)
    assert np.allclose(supp, supp_custom)
    assert np.allclose(dense, dense_custom)
Exemple #2
0
def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for key in models.keys():
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                    y_train,
                                                    dict_log_alpha[key],
                                                    tol=tol,
                                                    model=models[key])
        supp1sk, dense1sk, jac1sk = get_beta_jac_iterdiff(X_train,
                                                          y_train,
                                                          dict_log_alpha[key],
                                                          tol=tol,
                                                          model=models[key])
        supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                         y_train,
                                                         dict_log_alpha[key],
                                                         get_v,
                                                         tol=tol,
                                                         model=models[key],
                                                         tol_jac=tol)
        supp3, dense3, jac3 = get_beta_jac_iterdiff(X_train_s,
                                                    y_train,
                                                    dict_log_alpha[key],
                                                    tol=tol,
                                                    model=models[key])
        supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_train_s,
                                                         y_train,
                                                         dict_log_alpha[key],
                                                         get_v,
                                                         tol=tol,
                                                         model=models[key],
                                                         tol_jac=tol)

        assert np.all(supp1 == supp1sk)
        assert np.all(supp1 == supp2)
        assert np.allclose(dense1, dense1sk)
        assert np.allclose(dense1, dense2)
        assert np.allclose(jac1, jac2)

        assert np.all(supp2 == supp3)
        assert np.allclose(dense2, dense3)
        assert np.allclose(jac2, jac3)

        assert np.all(supp3 == supp4)
        assert np.allclose(dense3, dense4)
        assert np.allclose(jac3, jac4)

        get_beta_jac_t_v_implicit(X_train,
                                  y_train,
                                  dict_log_alpha[key],
                                  get_v,
                                  model=models[key])
Exemple #3
0
def test_beta_jac_custom_solver(model, model_custom):
    supp, dense, jac = get_beta_jac_fast_iterdiff(
        X_train, y_train, log_alpha,
        get_v, tol=tol, model=model, tol_jac=1e-12)

    supp_custom, dense_custom, jac_custom = get_beta_jac_fast_iterdiff(
        X_train, y_train, log_alpha, get_v, tol=tol, model=model_custom,
        tol_jac=1e-12)

    assert np.all(supp == supp_custom)
    assert np.allclose(dense, dense_custom)
    assert np.allclose(jac, jac_custom)
Exemple #4
0
def test_beta_jac():
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                np.array(
                                                    [log_alpha1, log_alpha2]),
                                                tol=tol,
                                                model=model,
                                                compute_jac=True,
                                                max_iter=max_iter)

    estimator = linear_model.ElasticNet(alpha=(alpha_1 + alpha_2),
                                        fit_intercept=False,
                                        l1_ratio=alpha_1 / (alpha_1 + alpha_2),
                                        tol=1e-16,
                                        max_iter=max_iter)
    estimator.fit(X_train, y_train)

    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(
        X_train,
        y_train,
        np.array([log_alpha1, log_alpha2]),
        get_v,
        tol=tol,
        model=model,
        tol_jac=1e-16,
        max_iter=max_iter,
        niter_jac=10000)
    assert np.allclose(dense1, estimator.coef_[estimator.coef_ != 0])
    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
Exemple #5
0
def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for model in models:
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                    y_train,
                                                    dict_log_alpha[model],
                                                    tol=tol,
                                                    model=model)
        supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                         y_train,
                                                         dict_log_alpha[model],
                                                         X_test,
                                                         y_test,
                                                         tol=tol,
                                                         model=model,
                                                         tol_jac=tol)
        supp3, dense3, jac3 = get_beta_jac_iterdiff(X_train_s,
                                                    y_train,
                                                    dict_log_alpha[model],
                                                    tol=tol,
                                                    model=model)
        supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_train_s,
                                                         y_train,
                                                         dict_log_alpha[model],
                                                         X_test,
                                                         y_test,
                                                         tol=tol,
                                                         model=model,
                                                         tol_jac=tol)

        assert np.all(supp1 == supp2)
        assert np.allclose(dense1, dense2)
        assert np.allclose(jac1, jac2)

        assert np.all(supp2 == supp3)
        assert np.allclose(dense2, dense3)
        assert np.allclose(jac2, jac3)

        assert np.all(supp3 == supp4)
        assert np.allclose(dense3, dense4)
        assert np.allclose(jac3, jac4)
Exemple #6
0
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(
        X_train, y_train, log_alpha, tol=tol,
        model=model, compute_jac=True, max_iter=1000)

    clf = LogisticRegression(penalty="l1", tol=1e-12, C=(
        1 / (alpha * n_samples)), fit_intercept=False, max_iter=100000,
        solver="saga")
    clf.fit(X_train, y_train)
    supp_sk = clf.coef_ != 0
    dense_sk = clf.coef_[supp_sk]

    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(
        X_train, y_train, log_alpha,
        get_v, tol=tol, model=model, tol_jac=1e-12)

    supp3, dense3, jac3 = get_beta_jac_iterdiff(
        X_train, y_train, log_alpha, tol=tol,
        model=model, compute_jac=True, max_iter=1000)

    supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(
        X_train_s, y_train, log_alpha,
        get_v, tol=tol, model=model, tol_jac=1e-12)

    assert np.all(supp1 == supp_sk)
    assert np.allclose(dense1, dense_sk, atol=1e-4)

    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
    assert np.allclose(jac1, jac2, atol=1e-4)

    assert np.all(supp2 == supp3)
    assert np.allclose(dense2, dense3)
    assert np.allclose(jac2, jac3, atol=1e-4)

    assert np.all(supp3 == supp4)
    assert np.allclose(dense3, dense4)
    assert np.allclose(jac3, jac4, atol=1e-4)
Exemple #7
0
def test_beta_jac2():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for key in models.keys():
        supp, dense, jac = get_beta_jac_fast_iterdiff(X_train_s,
                                                      y_train,
                                                      dict_log_alpha[key],
                                                      get_v,
                                                      tol=tol,
                                                      model=models[key],
                                                      tol_jac=tol)
        supp_custom, dense_custom, jac_custom = get_beta_jac_fast_iterdiff(
            X_train_s,
            y_train,
            dict_log_alpha[key],
            get_v,
            tol=tol,
            model=models[key],
            tol_jac=tol)
        assert np.all(supp == supp_custom)
        assert np.allclose(dense, dense_custom)
        assert np.allclose(jac, jac_custom)
Exemple #8
0
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_C,
                                                tol=tol,
                                                model=model,
                                                compute_jac=True,
                                                max_iter=10000)

    beta = np.zeros(n_samples)
    beta[supp1] = dense1
    full_supp = np.logical_and(beta > 0, beta < C)
    # full_supp = np.logical_or(beta <= 0, beta >= C)

    Q = (y_train[:, np.newaxis] * X_train) @ (y_train[:, np.newaxis] *
                                              X_train).T
    v = (np.eye(n_samples, n_samples) - Q)[np.ix_(
        full_supp, beta >= C)] @ (np.ones((beta >= C).sum()) * C)

    jac_dense = np.linalg.solve(Q[np.ix_(full_supp, full_supp)], v)
    assert np.allclose(jac_dense, jac1[dense1 < C])

    if issparse(model.X):
        primal = np.sum(X_train_s[supp1, :].T.multiply(y_train[supp1] *
                                                       dense1),
                        axis=1)
        primal = primal.T
    else:
        primal = np.sum(y_train[supp1] * dense1 * X_train[supp1, :].T, axis=1)
    clf = LinearSVC(loss="hinge",
                    fit_intercept=False,
                    C=C,
                    tol=tol,
                    max_iter=100000)
    clf.fit(X_train, y_train)
    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                     y_train,
                                                     log_C,
                                                     get_v,
                                                     tol=tol,
                                                     model=model,
                                                     tol_jac=1e-16,
                                                     max_iter=10000)
    assert np.allclose(primal, clf.coef_)

    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
    assert np.allclose(jac1, jac2, atol=1e-4)
Exemple #9
0
def get_val_grad(X_train,
                 y_train,
                 log_alpha,
                 X_val,
                 y_val,
                 X_test,
                 y_test,
                 tol,
                 monitor,
                 warm_start,
                 method="implicit",
                 maxit=1000,
                 niter_jac=1000,
                 model="lasso",
                 tol_jac=1e-3,
                 convexify=False,
                 gamma=1e-2,
                 criterion="cv",
                 C=2.0,
                 gamma_sure=0.3,
                 sigma=1,
                 random_state=42,
                 beta_star=None):
    """
    Parameters
    --------------
    X_train: np.array, shape (n_samples, n_features)
        observation used for training
    y_train: np.array, shape (n_samples, n_features)
        targets used for training
    log_alpha: float
        log of the regularization coefficient alpha
    X_val: np.array, shape (n_samples, n_features)
        observation used for cross-validation
    y_val: np.array, shape (n_samples, n_features)
        targets used for cross-validation
    X_test: np.array, shape (n_samples, n_features)
        observation used for testing
    y_test: np.array, shape (n_samples, n_features)
        targets used for testing
    tol : float
        tolerance for the inner optimization solver
    monitor: Monitor object
        used to store the value of the cross-validation function
    warm_start: WarmStart object
        used for warm start for all methods
    method: string
        method used to compute the hypergradient, you may want to use
        "implicit" "forward" "backward" "fast_forward_iterdiff"
    maxit: int
        maximum number of iterations in the inner optimization solver
    niter_jac: int
        maximum number of iteration for the fast_forward_iterdiff
        method in the Jacobian computation
    model: string
        model used, "lasso", "wlasso", "mcp"
    tol_jac: float
        tolerance for the Jacobian loop
    convexify: bool
        True if you want to regularize the problem
    gamma: non negative float
        convexification coefficient
    criterion: string
        criterion to optimize during hyperparameter optimization
        you may choose between "cv" and "sure"
    C: float
        constant for sure problem
    gamma_sure:
        constant for sure problem
     sigma,
        constant for sure problem
    random_state: int
    beta_star: np.array, shape (n_features,)
        True coefficients of the underlying model (if known)
        used to compute metrics
    """

    n_samples, n_features = X_train.shape

    # warm start for cross validation loss and sure
    mask0, dense0, jac0, mask20, dense20, jac20 = (warm_start.mask_old,
                                                   warm_start.beta_old,
                                                   warm_start.dbeta_old,
                                                   warm_start.mask_old2,
                                                   warm_start.beta_old2,
                                                   warm_start.dbeta_old2)

    if criterion == "cv":
        mask2 = None
        dense2 = None
        jac2 = None
        rmse = None
        if method == "implicit":
            sol_lin_sys = warm_start.sol_lin_sys
            mask, dense, jac, sol_lin_sys = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask0,
                dense0=dense0,
                jac0=jac0,
                tol=tol,
                model=model)
            warm_start.set_sol_lin_sys(sol_lin_sys)
        elif method == "forward":
            mask, dense, jac = get_beta_jac_iterdiff(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit,
                                                     model=model)

        elif method == "implicit_forward":
            mask, dense, jac = get_beta_jac_fast_iterdiff(X_train,
                                                          y_train,
                                                          log_alpha,
                                                          X_val,
                                                          y_val,
                                                          mask0,
                                                          dense0,
                                                          jac0,
                                                          tol,
                                                          maxit,
                                                          niter_jac=niter_jac,
                                                          tol_jac=tol_jac,
                                                          model=model)
        elif method == "backward":
            mask, dense, jac = get_beta_jac_backward(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     X_val,
                                                     y_val,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit)
        elif method == "hyperopt":
            mask, dense = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_alpha,
                                                mask0=mask0,
                                                dense0=dense0,
                                                tol=tol,
                                                maxit=maxit,
                                                model=model,
                                                compute_jac=False)
            jac = None
        else:
            raise ValueError('No method called %s' % method)
        # value of the objective function on the validation loss
        if convexify:
            val = norm(y_val - X_val[:, mask] @ dense)**2 / X_val.shape[0]
            val += gamma * np.sum(np.exp(log_alpha)**2)
        else:
            val = norm(y_val - X_val[:, mask] @ dense)**2 / X_val.shape[0]
        # value of the objective function on the test loss
        val_test = norm(y_test - X_test[:, mask] @ dense)**2 / X_test.shape[0]

        if method in ("implicit", "backward", "hyperopt"):
            grad = jac
        else:
            if model in ("lasso", "mcp"):
                grad = 2 * jac.T @ (X_val[:, mask].T @ (
                    X_val[:, mask] @ dense - y_val)) / X_val.shape[0]

            elif model == "wlasso":
                grad = np.zeros(n_features)
                grad[mask] = 2 * jac.T @ (X_val[:, mask].T @ (
                    X_val[:, mask] @ dense - y_val)) / X_val.shape[0]
                if convexify:
                    grad += gamma * np.exp(log_alpha)
    elif criterion == "sure":
        val_test = 0
        epsilon = C * sigma / (n_samples)**gamma_sure
        # TODO properly
        rng = np.random.RandomState(random_state)
        delta = rng.randn(n_samples)  # sample random noise for MCMC step
        y_train2 = y_train + epsilon * delta
        if method == "implicit":
            # TODO
            sol_lin_sys = warm_start.sol_lin_sys
            mask, dense, jac, sol_lin_sys = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask0,
                dense0=dense0,
                jac0=jac0,
                tol=tol,
                model=model,
                criterion="sure",
                n=1,
                sol_lin_sys=sol_lin_sys,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta)
            sol_lin_sys2 = warm_start.sol_lin_sys2
            mask2, dense2, jac2, sol_lin_sys2 = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask20,
                dense0=dense20,
                jac0=jac20,
                tol=tol,
                model=model,
                criterion="sure",
                n=2,
                sol_lin_sys=sol_lin_sys2,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta)
            warm_start.set_sol_lin_sys(sol_lin_sys)
            # 1 / 0
        elif method == "forward":
            mask, dense, jac = get_beta_jac_iterdiff(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit,
                                                     model=model)
            mask2, dense2, jac2 = get_beta_jac_iterdiff(X_train,
                                                        y_train2,
                                                        log_alpha,
                                                        mask0=mask20,
                                                        dense0=dense20,
                                                        jac0=jac20,
                                                        tol=tol,
                                                        maxit=maxit,
                                                        model=model)
        elif method == "implicit_forward":
            # TODO modify
            mask, dense, jac = get_beta_jac_fast_iterdiff(X_train,
                                                          y_train,
                                                          log_alpha,
                                                          None,
                                                          None,
                                                          mask0,
                                                          dense0,
                                                          jac0,
                                                          tol,
                                                          maxit,
                                                          criterion="sure",
                                                          niter_jac=niter_jac,
                                                          tol_jac=tol_jac,
                                                          model=model,
                                                          sigma=sigma,
                                                          epsilon=epsilon,
                                                          delta=delta,
                                                          n=1)
            mask2, dense2, jac2 = get_beta_jac_fast_iterdiff(
                X_train,
                y_train2,
                log_alpha,
                X_val,
                y_val,
                mask20,
                dense20,
                jac20,
                tol,
                maxit,
                criterion="sure",
                niter_jac=niter_jac,
                tol_jac=tol_jac,
                model=model,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta,
                n=2)
        elif method == "backward":
            mask, dense, jac = get_beta_jac_backward(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     X_val,
                                                     y_val,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit)
            mask2, dense2, jac2 = get_beta_jac_backward(X_train,
                                                        y_train2,
                                                        log_alpha,
                                                        X_val,
                                                        y_val,
                                                        mask0=mask02,
                                                        dense0=dense02,
                                                        jac0=jac02,
                                                        tol=tol,
                                                        maxit=maxit)
        elif method == "hyperopt":
            mask, dense = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_alpha,
                                                mask0=mask0,
                                                dense0=dense0,
                                                tol=tol,
                                                maxit=maxit,
                                                model=model,
                                                compute_jac=False)
            mask2, dense2 = get_beta_jac_iterdiff(X_train,
                                                  y_train2,
                                                  log_alpha,
                                                  mask0=mask20,
                                                  dense0=dense20,
                                                  tol=tol,
                                                  maxit=maxit,
                                                  model=model,
                                                  compute_jac=False)
            jac, jac2 = None, None

        # compute the degree of freedom
        dof = (X_train[:, mask2] @ dense2 - X_train[:, mask] @ dense) @ delta
        dof /= epsilon
        # compute the value of the sure
        val = norm(y_train - X_train[:, mask] @ dense)**2
        val -= n_samples * sigma**2
        val += 2 * sigma**2 * dof
        if convexify:
            val += gamma * np.sum(np.exp(log_alpha)**2)

        if beta_star is not None:
            diff_beta = beta_star.copy()
            diff_beta[mask] -= dense
            rmse = norm(diff_beta)
        else:
            rmse = None

        if method == "hyperopt":
            monitor(val, None, log_alpha, rmse=rmse)
            return val
        if method in ("implicit", "backward", "hyperopt"):
            grad = jac
        elif model == "lasso":
            grad = 2 * jac.T @ X_train[:, mask].T @ (
                X_train[:, mask] @ dense - y_train -
                delta * sigma**2 / epsilon)
            grad += (2 * sigma**2 * jac2.T @ X_train[:, mask2].T @ delta /
                     epsilon)
        elif model == "wlasso":
            grad = np.zeros(n_features)
            grad[mask] = 2 * jac.T @ X_train[:, mask].T @ (
                X_train[:, mask] @ dense - y_train -
                delta * sigma**2 / epsilon)
            grad[mask2] += (2 * sigma**2 *
                            jac2.T @ X_train[:, mask2].T @ delta / epsilon)
            if convexify:
                grad += gamma * np.exp(log_alpha)

    warm_start(mask, dense, jac, mask2, dense2, jac2)
    if model == "lasso":
        monitor(val, val_test, log_alpha, grad, rmse=rmse)
    elif model in ("mcp", "wlasso"):
        monitor(val, val_test, log_alpha.copy(), grad)
    else:
        monitor(val, val_test, log_alpha.copy(), rmse=rmse)
    if method == "hyperopt":
        return val
    else:
        return val, np.array(grad)