Beispiel #1
0
def test_FracRidgeRegressorCV(nn, pp, bb, fit_intercept, jit):
    X, y, _, _ = make_data(nn, pp, bb, fit_intercept)
    fracs = np.arange(.1, 1.1, .1)
    FRCV = FracRidgeRegressorCV(fit_intercept=fit_intercept, jit=jit)
    FRCV.fit(X, y, frac_grid=fracs)

    FR = FracRidgeRegressor(fracs=FRCV.best_frac_, fit_intercept=fit_intercept)
    FR.fit(X, y)
    assert np.allclose(FR.coef_, FRCV.coef_, atol=10e-3)
    RR = Ridge(alpha=FRCV.alpha_, fit_intercept=fit_intercept, solver='svd')
    RR.fit(X, y)
    # The coefficients in the sklearn object are transposed relative to
    # our conventions:
    assert np.allclose(RR.coef_.T, FRCV.coef_, atol=10e-3)
n_targets = 15
n_features = 80
effective_rank = 20
X, y, coef_true = make_regression(n_samples=250,
                                  n_features=n_features,
                                  effective_rank=effective_rank,
                                  n_targets=n_targets,
                                  coef=True,
                                  noise=10)

##########################################################################
# Iterating over the number of features, we generate design matrices that have
# more and more dimensions in them. As the number of data dimensions grows, the
# best fraction for FracRidge decreases.

best_fracs = []

for n_components in range(2, X.shape[-1], 5):
    pca = PCA(n_components=n_components)
    frcv = FracRidgeRegressorCV()
    pipeline = Pipeline(steps=[('pca', pca), ('fracridgecv', frcv)])
    pipeline.fit(X, y)
    best_fracs.append(pipeline['fracridgecv'].best_frac_)

fig, ax = plt.subplots()
ax.plot(range(2, X.shape[-1], 5), best_fracs, 'o-')
ax.set_ylim([0, 1])
ax.set_ylabel("Best fraction")
ax.set_xlabel("Number of PCA components")

plt.show()
Beispiel #3
0
# We will start with SRR. We use a dense grid of alphas with 20
# log-spaced values -- a common heuristic used to ensure a wide sampling
# of alpha values

n_alphas = 20
srr_alphas = np.logspace(-10, 10, n_alphas)
srr = RidgeCV(alphas=srr_alphas)
srr.fit(X_train, y_train)

##########################################################################
# We sample the same number of fractions for FRR, evenly distributed between
# 1/n_alphas and 1.
#

fracs = np.linspace(1 / n_alphas, 1 + 1 / n_alphas, n_alphas)
frr = FracRidgeRegressorCV()
frr.fit(X_train, y_train, frac_grid=fracs)

##########################################################################
# Both models are fit and used to predict a left out set. Performance
# of the models is compared using the :func:`sklearn.metrics.r2_score`
# function (coefficient of determination).

pred_frr = frr.predict(X_test)
pred_srr = srr.predict(X_test)

frr_r2 = r2_score(y_test, pred_frr)
srr_r2 = r2_score(y_test, pred_srr)

print(frr_r2)
print(srr_r2)
def test_FracRidge_estimator():
    check_estimator(FracRidgeRegressor())
    check_estimator(FracRidgeRegressorCV())
# We will start with SRR. We use a dense grid of alphas with 20
# log-spaced values -- a common heuristic used to ensure a wide sampling
# of alpha values

n_alphas = 20
srr_alphas = np.logspace(-10, 10, n_alphas)
srr = RidgeCV(alphas=srr_alphas)
srr.fit(X_train, y_train)

##########################################################################
# We sample the same number of fractions for FRR, evenly distributed between
# 1/n_alphas and 1.
#

fracs = np.linspace(1 / n_alphas, 1 + 1 / n_alphas, n_alphas)
frr = FracRidgeRegressorCV(frac_grid=fracs)
frr.fit(X_train, y_train)

##########################################################################
# Both models are fit and used to predict a left out set. Performance
# of the models is compared using the :func:`sklearn.metrics.r2_score`
# function (coefficient of determination).

pred_frr = frr.predict(X_test)
pred_srr = srr.predict(X_test)

frr_r2 = r2_score(y_test, pred_frr)
srr_r2 = r2_score(y_test, pred_srr)

print(frr_r2)
print(srr_r2)
Beispiel #6
0
import numpy as np
from fracridge import (fracridge, vec_len, FracRidgeRegressor,
                       FracRidgeRegressorCV)
from sklearn.linear_model import Ridge
import pytest
from sklearn.utils.estimator_checks import parametrize_with_checks


@parametrize_with_checks([FracRidgeRegressor(), FracRidgeRegressorCV()])
def test_sklearn_compatible_estimator(estimator, check):
    check(estimator)


def run_fracridge(X, y, fracs, jit):
    fracridge(X, y, fracs=fracs, jit=jit)


@pytest.mark.parametrize("nn, pp", [(1000, 10), (10, 100), (284, 50)])
@pytest.mark.parametrize("bb", [(1), (2), (1000)])
@pytest.mark.parametrize("jit", [True, False])
def test_benchmark_fracridge(nn, pp, bb, jit, benchmark):
    X, y, _, _ = make_data(nn, pp, bb)
    fracs = np.arange(.1, 1.1, .1)
    benchmark(run_fracridge, X, y, fracs, jit)


def make_data(nn, pp, bb, fit_intercept=False):
    np.random.seed(1)
    X = np.random.randn(nn, pp)
    y = np.random.randn(nn, bb).squeeze()
    if fit_intercept: