Example #1
0
def test_group_lasso_lasso(sparse_X, fit_intercept, normalize):
    # check that group Lasso with groups of size 1 gives Lasso
    n_features = 1000
    X, y = build_dataset(n_samples=100,
                         n_features=n_features,
                         sparse_X=sparse_X)[:2]
    alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
    alpha = alpha_max / 10
    clf = Lasso(alpha,
                tol=1e-12,
                fit_intercept=fit_intercept,
                normalize=normalize,
                verbose=0)
    clf.fit(X, y)
    # take groups of size 1:

    clf1 = GroupLasso(alpha=alpha,
                      groups=1,
                      tol=1e-12,
                      fit_intercept=fit_intercept,
                      normalize=normalize,
                      verbose=0)
    clf1.fit(X, y)

    np.testing.assert_allclose(clf1.coef_, clf.coef_, atol=1e-6)
    np.testing.assert_allclose(clf1.intercept_, clf.intercept_, rtol=1e-4)
Example #2
0
class Solver(BaseSolver):
    name = 'Celer'
    stop_strategy = 'iteration'

    install_cmd = 'conda'
    requirements = ['pip:git+https://github.com/mathurinm/celer.git']

    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        n_samples = self.X.shape[0]
        self.lasso = Lasso(
            alpha=self.lmbd / n_samples,
            max_iter=1,
            max_epochs=100000,
            tol=1e-12,
            prune=True,
            fit_intercept=False,
            normalize=False,
            warm_start=False,
            positive=False,
            verbose=False,
        )

    def run(self, n_iter):
        self.lasso.max_iter = n_iter
        self.lasso.fit(self.X, self.y)

    def get_result(self):
        return self.lasso.coef_.flatten()
Example #3
0
    def fit_weights(self, y, phi, reg_param, tol_factor=1e-4):
        obs = self.compute_obs(phi, version=1)
        mat = np.array([np.sum(obs[i], axis=0) for i in range(self.num_atoms)])
        mat = mat.reshape((self.num_atoms, -1)).T

        tol = tol_factor * np.linalg.norm(y)**2 / y.size
        perimeters = np.array([self.atoms[i].support.compute_perimeter() for i in range(self.num_atoms)])

        lasso = Lasso(alpha=reg_param/y.size, fit_intercept=False, tol=tol, weights=perimeters)
        lasso.fit(mat, y.reshape(-1))

        new_weights = lasso.coef_
        self.atoms = [WeightedIndicatorFunction(new_weights[i], self.atoms[i].support)
                      for i in range(self.num_atoms) if np.abs(new_weights[i]) > 1e-2]
Example #4
0
class Solver(BaseSolver):
    name = 'Celer'
    stop_strategy = 'iteration'

    install_cmd = 'conda'
    requirements = ['pip:celer']
    references = [
        'M. Massias, A. Gramfort and J. Salmon, ICML, '
        '"Celer: a Fast Solver for the Lasso with Dual Extrapolation", '
        'vol. 80, pp. 3321-3330 (2018)'
    ]

    def set_objective(self, X, y, lmbd, fit_intercept):
        self.X, self.y, self.lmbd = X, y, lmbd
        self.fit_intercept = fit_intercept

        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        n_samples = self.X.shape[0]
        self.lasso = Lasso(
            alpha=self.lmbd / n_samples,
            max_iter=1,
            max_epochs=100000,
            tol=1e-12,
            prune=True,
            fit_intercept=fit_intercept,
            warm_start=False,
            positive=False,
            verbose=False,
        )

    def run(self, n_iter):
        self.lasso.max_iter = n_iter
        self.lasso.fit(self.X, self.y)

    def get_result(self):

        beta = self.lasso.coef_.flatten()
        if self.fit_intercept:
            beta = np.r_[beta, self.lasso.intercept_]
        return beta
Example #5
0
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"):

    X, y = load_libsvm(dataset_name)
    X = csc_matrix(X)
    n_samples, n_features = X.shape
    p_alpha = p_alphas[dataset_name, model_name]

    max_iter = max_iters[dataset_name]
    if model_name == "lasso":
        model = Lasso(X, y, 0, max_iter=max_iter, tol=tol)
    elif model_name == "logreg":
        model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol)

    alpha_max = np.exp(model.compute_alpha_max())

    alpha = p_alpha * alpha_max
    if model_name == "lasso":
        clf = Lasso_cel(alpha=alpha,
                        fit_intercept=False,
                        warm_start=True,
                        tol=tol * norm(y)**2 / 2,
                        max_iter=10000)
        clf.fit(X, y)
        beta_star = clf.coef_
        mask = beta_star != 0
        dense = beta_star[mask]
    elif model_name == "logreg":
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True).fit(X, y)
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True,
        #     solver='liblinear').fit(X, y)
        # beta_star = clf.coef_[0]

        blitzl1.set_use_intercept(False)
        blitzl1.set_tolerance(1e-32)
        blitzl1.set_verbose(True)
        # blitzl1.set_min_time(60)
        prob = blitzl1.LogRegProblem(X, y)
        # # lammax = prob.compute_lambda_max()
        clf = prob.solve(alpha * n_samples)
        beta_star = clf.x
        mask = beta_star != 0
        mask = np.array(mask)
        dense = beta_star[mask]
    # if model == "lasso":
    v = -n_samples * alpha * np.sign(beta_star[mask])
    mat_to_inv = model.get_hessian(mask, dense, np.log(alpha))
    # mat_to_inv = X[:, mask].T  @ X[:, mask]

    jac_temp = cg(mat_to_inv, v, tol=1e-10)
    jac_star = np.zeros(n_features)
    jac_star[mask] = jac_temp[0]
    # elif model == "logreg":
    #     v = - n_samples * alpha * np.sign(beta_star[mask])

    log_alpha = np.log(alpha)

    list_beta, list_jac = get_beta_jac_iterdiff(X,
                                                y,
                                                log_alpha,
                                                model,
                                                save_iterates=True,
                                                tol=tol,
                                                max_iter=max_iter,
                                                compute_jac=compute_jac)

    diff_beta = norm(list_beta - beta_star, axis=1)
    diff_jac = norm(list_jac - jac_star, axis=1)

    supp_star = beta_star != 0
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        supp = list_beta[i, :] != 0
        if not np.all(supp == supp_star):
            supp_id = i + 1
            break
        supp_id = 0

    return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
Example #6
0
alpha0 = model_cv.alpha_ * np.ones(n_features)
# Weighted Lasso: Sparse-ho: 1 param per feature
estimator = Lasso(fit_intercept=False, max_iter=100, warm_start=True)
model = WeightedLasso(estimator=estimator)
sub_criterion = HeldOutMSE(idx_train, idx_val)
criterion = CrossVal(sub_criterion, cv=cv)
algo = ImplicitForward()
monitor = Monitor()
optimizer = GradientDescent(
    n_outer=100, tol=1e-7, verbose=True, p_grad_norm=1.9)
results = grad_search(
    algo, criterion, model, optimizer, X, y, alpha0, monitor)
##############################################################################

estimator.weights = monitor.alphas[-1]
estimator.fit(X, y)
##############################################################################
# MSE on validation set
mse_sho_val = mean_squared_error(y, estimator.predict(X))

# MSE on test set, ie unseen data
mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test))

# Oracle MSE
mse_oracle = mean_squared_error(y_test, X_test @ w_true)

print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val)
print("Sparse-ho: Mean-squared error on test (unseen) data %f" % mse_sho_test)


labels = ['WeightedLasso val', 'WeightedLasso test', 'Lasso CV', 'Oracle']