Example #1
0
    def fit(self, X, y, train_idx=None, test_idx=None):
        if train_idx is None or test_idx is None:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, shuffle=True, train_size=self.train_ratio)
        else:
            X_train, y_train = X[train_idx], y[train_idx]
            X_test, y_test = X[test_idx], y[test_idx]

        def callback(w):
            return mean_squared_error(y_test, X_test @ w)

        w, thetas, mses = dual_primal(X_train,
                                      y_train,
                                      step=self.step,
                                      max_iter=self.max_iter,
                                      f_store=self.f_test,
                                      callback=callback,
                                      prox=self.prox,
                                      memory=self.memory,
                                      ret_all=False,
                                      verbose=self.verbose)

        self.coef_ = w
        self.mses = mses
        return self
def plot_varying_sigma(corr, density, snr, max_iter=100):
    np.random.seed(0)
    # true coefficient vector has entries equal to 0 or 1
    supp = np.random.choice(n_features, size=int(density * n_features),
                            replace=False)
    w_true = np.zeros(n_features)
    w_true[supp] = 1
    X_, y_, w_true = make_correlated_data(
        n_samples=int(n_samples * 4 / 3.), n_features=n_features,
        w_true=w_true,
        corr=corr, snr=snr, random_state=0)

    X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25)

    print('Starting computation for this setting')

    ratio = 10 * datadriven_ratio(X, y)
    _, _, _, all_w = dual_primal(
        X, y, step_ratio=ratio, rho=0.99, ret_all=True,
        max_iter=max_iter,
        f_store=1)

    fig, axarr = plt.subplots(2, 2, sharey='row', sharex='col',
                              figsize=(4.2, 3.5), constrained_layout=True)

    scores = [f1_score(w != 0, w_true != 0) for w in all_w]
    mses = np.array([mean_squared_error(y_test, X_test @ w) for w in all_w])

    axarr[0, 0].plot(scores)
    axarr[1, 0].plot(mses / np.mean(y_test ** 2))

    axarr[0, 0].set_ylim(0, 1)
    axarr[0, 0].set_ylabel('F1 score')
    axarr[1, 0].set_ylabel("pred MSE left out")
    axarr[-1, 0].set_xlabel("CP iteration")
    axarr[0, 0].set_title('Iterative regularization')

    # last column: Lasso results
    alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3)

    coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T
    axarr[0, 1].semilogx(
        alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs])
    axarr[1, 1].semilogx(
        alphas,
        np.array([mean_squared_error(y_test, X_test @ coef) for coef in coefs])
        / np.mean(y_test ** 2))

    axarr[-1, 1].set_xlabel(r'$\lambda$')
    axarr[0, 1].set_title("Lasso path")

    axarr[0, 1].invert_xaxis()

    plt.show(block=False)
    return fig
def wrapper(X, y, delta, max_iter, f_store, rep):
    print(delta, rep)
    np.random.seed(rep)
    noise = np.random.randn(y.shape[0])
    y_noise = y + delta * noise / norm(noise)
    all_w = dual_primal(X,
                        y_noise,
                        step=1,
                        max_iter=max_iter,
                        f_store=f_store,
                        verbose=False)[-1]
    return all_w
Example #4
0
sigma_good = 1. / norm(X.T @ y, ord=np.inf)
ratio_good = 0.99 / (L**2 * sigma_good**2)

ratios = [1, 100, ratio_good, 10000]
labels = [
    r"$\sigma=\tau$",
    f"$\\sigma = \\tau / {ratios[1]}$",
    "data-driven $\\sigma$",
    f"$\\sigma = \\tau / {ratios[3]}$",
]
all_w = dict()

for ratio, label in zip(ratios, labels):
    all_w[ratio] = dual_primal(X,
                               y,
                               ret_all=True,
                               max_iter=60,
                               step_ratio=ratio,
                               f_store=1)[-1]
    f1_scores = [f1_score(w != 0, w_true != 0) for w in all_w[ratio]]
    supp_size = np.sum(all_w[ratio] != 0, axis=1)
    axarr[0].plot(f1_scores, label=label)
    axarr[1].plot(supp_size)

axarr[0].legend(ncol=4, loc='lower right')
axarr[0].set_ylim(0, 1)
axarr[0].set_ylabel('F1 score for support')
axarr[1].set_ylabel(r"$||x_k||_0$")
axarr[1].set_xlabel(r'Iterative regularization iteration')

plt.show(block=False)
Example #5
0
def plot_varying_sigma(corr, density, snr, steps, max_iter=100, rho=0.99):
    np.random.seed(0)
    # true coefficient vector has entries equal to 0 or 1
    supp = np.random.choice(n_features,
                            size=int(density * n_features),
                            replace=False)
    w_true = np.zeros(n_features)
    w_true[supp] = 1
    X_, y_, w_true = make_correlated_data(n_samples=int(n_samples * 4 / 3.),
                                          n_features=n_features,
                                          w_true=w_true,
                                          corr=corr,
                                          snr=snr,
                                          random_state=0)

    X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25)

    print('Starting computation for this setting')
    fig, axarr = plt.subplots(4,
                              2,
                              sharey='row',
                              sharex='col',
                              figsize=(7, 5),
                              constrained_layout=True)

    fig.suptitle(r"Correlation=%.1f, $||w^*||_0$= %s, snr=%s" %
                 (corr, (w_true != 0).sum(), snr))

    for i, step in enumerate(steps):
        _, _, _, all_w = dual_primal(X,
                                     y,
                                     step=step,
                                     rho=rho,
                                     ret_all=True,
                                     max_iter=max_iter,
                                     f_store=1)
        scores = [f1_score(w != 0, w_true != 0) for w in all_w]
        supp_size = np.sum(all_w != 0, axis=1)
        mses = [mean_squared_error(y_test, X_test @ w) for w in all_w]

        axarr[0, 0].plot(scores, label=r"$\sigma=1 /%d ||X||$" % step)
        axarr[1, 0].semilogy(supp_size)
        axarr[2, 0].plot(norm(all_w - w_true, axis=1))
        axarr[3, 0].plot(mses)

    axarr[0, 0].set_ylim(0, 1)
    axarr[0, 0].set_ylabel('F1 score for support')
    axarr[1, 0].set_ylabel(r"$||w_k||_0$")
    axarr[2, 0].set_ylabel(r'$\Vert w_k - w^*\Vert$')
    axarr[2, 0].set_xlabel("CP iteration")
    axarr[3, 0].set_ylabel("pred MSE left out")
    axarr[0, 0].legend(loc='lower right', fontsize=10)
    axarr[0, 0].set_title('Iterative regularization')

    # last column: Lasso results
    alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3)

    coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T
    axarr[0, 1].semilogx(alphas,
                         [f1_score(coef != 0, w_true != 0) for coef in coefs])
    axarr[1, 1].semilogx(alphas, [np.sum(coef != 0) for coef in coefs])
    axarr[2, 1].semilogx(alphas, [norm(coef - w_true) for coef in coefs])
    axarr[3, 1].semilogx(
        alphas, [mean_squared_error(y_test, X_test @ coef) for coef in coefs])

    axarr[3, 1].set_xlabel(r'$\lambda$')
    axarr[0, 1].set_title("Lasso path")

    for i in range(3):
        axarr[i, 1].set_xlim(*axarr[i, 1].get_xlim()[::-1])

    plt.show(block=False)
                          1,
                          sharex=True,
                          constrained_layout=True,
                          figsize=(7.15, 3))

sigma_good = 1. / np.sort(np.abs(X.T @ y))[int(0.99 * n_features)] / 2
step_good = 1 / (sigma_good * norm(X, ord=2))

steps = [1, 100, step_good]
labels = [r"$\sigma=\tau$", r"$\sigma \ll \tau$", "data-driven"]
all_w = dict()

for step, label in zip(steps, labels):
    all_w[step] = dual_primal(X,
                              y,
                              ret_all=True,
                              max_iter=100,
                              step=step,
                              f_store=1)[-1]
    f1_scores = [f1_score(w != 0, w_true != 0) for w in all_w[step]]
    supp_size = np.sum(all_w[step] != 0, axis=1)
    axarr[0].plot(f1_scores, label=label)
    axarr[1].plot(supp_size)

axarr[0].legend(ncol=3)
axarr[0].set_ylim(0, 1)
axarr[0].set_ylabel('F1 score for support')
axarr[1].set_ylabel(r"$||w_k||_0$")
axarr[1].set_xlabel(r'Chambolle Pock iteration')

plt.show(block=False)