def fit(self, X, y, train_idx=None, test_idx=None): if train_idx is None or test_idx is None: X_train, X_test, y_train, y_test = train_test_split( X, y, shuffle=True, train_size=self.train_ratio) else: X_train, y_train = X[train_idx], y[train_idx] X_test, y_test = X[test_idx], y[test_idx] def callback(w): return mean_squared_error(y_test, X_test @ w) w, thetas, mses = dual_primal(X_train, y_train, step=self.step, max_iter=self.max_iter, f_store=self.f_test, callback=callback, prox=self.prox, memory=self.memory, ret_all=False, verbose=self.verbose) self.coef_ = w self.mses = mses return self
def plot_varying_sigma(corr, density, snr, max_iter=100): np.random.seed(0) # true coefficient vector has entries equal to 0 or 1 supp = np.random.choice(n_features, size=int(density * n_features), replace=False) w_true = np.zeros(n_features) w_true[supp] = 1 X_, y_, w_true = make_correlated_data( n_samples=int(n_samples * 4 / 3.), n_features=n_features, w_true=w_true, corr=corr, snr=snr, random_state=0) X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25) print('Starting computation for this setting') ratio = 10 * datadriven_ratio(X, y) _, _, _, all_w = dual_primal( X, y, step_ratio=ratio, rho=0.99, ret_all=True, max_iter=max_iter, f_store=1) fig, axarr = plt.subplots(2, 2, sharey='row', sharex='col', figsize=(4.2, 3.5), constrained_layout=True) scores = [f1_score(w != 0, w_true != 0) for w in all_w] mses = np.array([mean_squared_error(y_test, X_test @ w) for w in all_w]) axarr[0, 0].plot(scores) axarr[1, 0].plot(mses / np.mean(y_test ** 2)) axarr[0, 0].set_ylim(0, 1) axarr[0, 0].set_ylabel('F1 score') axarr[1, 0].set_ylabel("pred MSE left out") axarr[-1, 0].set_xlabel("CP iteration") axarr[0, 0].set_title('Iterative regularization') # last column: Lasso results alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3) coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T axarr[0, 1].semilogx( alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs]) axarr[1, 1].semilogx( alphas, np.array([mean_squared_error(y_test, X_test @ coef) for coef in coefs]) / np.mean(y_test ** 2)) axarr[-1, 1].set_xlabel(r'$\lambda$') axarr[0, 1].set_title("Lasso path") axarr[0, 1].invert_xaxis() plt.show(block=False) return fig
def wrapper(X, y, delta, max_iter, f_store, rep): print(delta, rep) np.random.seed(rep) noise = np.random.randn(y.shape[0]) y_noise = y + delta * noise / norm(noise) all_w = dual_primal(X, y_noise, step=1, max_iter=max_iter, f_store=f_store, verbose=False)[-1] return all_w
sigma_good = 1. / norm(X.T @ y, ord=np.inf) ratio_good = 0.99 / (L**2 * sigma_good**2) ratios = [1, 100, ratio_good, 10000] labels = [ r"$\sigma=\tau$", f"$\\sigma = \\tau / {ratios[1]}$", "data-driven $\\sigma$", f"$\\sigma = \\tau / {ratios[3]}$", ] all_w = dict() for ratio, label in zip(ratios, labels): all_w[ratio] = dual_primal(X, y, ret_all=True, max_iter=60, step_ratio=ratio, f_store=1)[-1] f1_scores = [f1_score(w != 0, w_true != 0) for w in all_w[ratio]] supp_size = np.sum(all_w[ratio] != 0, axis=1) axarr[0].plot(f1_scores, label=label) axarr[1].plot(supp_size) axarr[0].legend(ncol=4, loc='lower right') axarr[0].set_ylim(0, 1) axarr[0].set_ylabel('F1 score for support') axarr[1].set_ylabel(r"$||x_k||_0$") axarr[1].set_xlabel(r'Iterative regularization iteration') plt.show(block=False)
def plot_varying_sigma(corr, density, snr, steps, max_iter=100, rho=0.99): np.random.seed(0) # true coefficient vector has entries equal to 0 or 1 supp = np.random.choice(n_features, size=int(density * n_features), replace=False) w_true = np.zeros(n_features) w_true[supp] = 1 X_, y_, w_true = make_correlated_data(n_samples=int(n_samples * 4 / 3.), n_features=n_features, w_true=w_true, corr=corr, snr=snr, random_state=0) X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25) print('Starting computation for this setting') fig, axarr = plt.subplots(4, 2, sharey='row', sharex='col', figsize=(7, 5), constrained_layout=True) fig.suptitle(r"Correlation=%.1f, $||w^*||_0$= %s, snr=%s" % (corr, (w_true != 0).sum(), snr)) for i, step in enumerate(steps): _, _, _, all_w = dual_primal(X, y, step=step, rho=rho, ret_all=True, max_iter=max_iter, f_store=1) scores = [f1_score(w != 0, w_true != 0) for w in all_w] supp_size = np.sum(all_w != 0, axis=1) mses = [mean_squared_error(y_test, X_test @ w) for w in all_w] axarr[0, 0].plot(scores, label=r"$\sigma=1 /%d ||X||$" % step) axarr[1, 0].semilogy(supp_size) axarr[2, 0].plot(norm(all_w - w_true, axis=1)) axarr[3, 0].plot(mses) axarr[0, 0].set_ylim(0, 1) axarr[0, 0].set_ylabel('F1 score for support') axarr[1, 0].set_ylabel(r"$||w_k||_0$") axarr[2, 0].set_ylabel(r'$\Vert w_k - w^*\Vert$') axarr[2, 0].set_xlabel("CP iteration") axarr[3, 0].set_ylabel("pred MSE left out") axarr[0, 0].legend(loc='lower right', fontsize=10) axarr[0, 0].set_title('Iterative regularization') # last column: Lasso results alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3) coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T axarr[0, 1].semilogx(alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs]) axarr[1, 1].semilogx(alphas, [np.sum(coef != 0) for coef in coefs]) axarr[2, 1].semilogx(alphas, [norm(coef - w_true) for coef in coefs]) axarr[3, 1].semilogx( alphas, [mean_squared_error(y_test, X_test @ coef) for coef in coefs]) axarr[3, 1].set_xlabel(r'$\lambda$') axarr[0, 1].set_title("Lasso path") for i in range(3): axarr[i, 1].set_xlim(*axarr[i, 1].get_xlim()[::-1]) plt.show(block=False)
1, sharex=True, constrained_layout=True, figsize=(7.15, 3)) sigma_good = 1. / np.sort(np.abs(X.T @ y))[int(0.99 * n_features)] / 2 step_good = 1 / (sigma_good * norm(X, ord=2)) steps = [1, 100, step_good] labels = [r"$\sigma=\tau$", r"$\sigma \ll \tau$", "data-driven"] all_w = dict() for step, label in zip(steps, labels): all_w[step] = dual_primal(X, y, ret_all=True, max_iter=100, step=step, f_store=1)[-1] f1_scores = [f1_score(w != 0, w_true != 0) for w in all_w[step]] supp_size = np.sum(all_w[step] != 0, axis=1) axarr[0].plot(f1_scores, label=label) axarr[1].plot(supp_size) axarr[0].legend(ncol=3) axarr[0].set_ylim(0, 1) axarr[0].set_ylabel('F1 score for support') axarr[1].set_ylabel(r"$||w_k||_0$") axarr[1].set_xlabel(r'Chambolle Pock iteration') plt.show(block=False)