Python MapieRegressor 예제들, mapie.estimators.MapieRegressor Python 예제들

예제 #1

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_prediction_between_low_up(return_pred: str) -> None:
    """Test that prediction lies between low and up prediction intervals."""
    mapie = MapieRegressor(LinearRegression(), return_pred=return_pred)
    mapie.fit(X_boston, y_boston)
    y_preds = mapie.predict(X_boston)
    y_pred, y_low, y_up = y_preds[:, 0], y_preds[:, 1], y_preds[:, 2]
    assert (y_pred >= y_low).all() & (y_pred <= y_up).all()

예제 #2

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_results(method: str) -> None:
    """
    Test that MapieRegressor applied on a linear regression model
    fitted on a linear curve results in null uncertainty.
    """
    mapie = MapieRegressor(LinearRegression(), method=method, n_splits=3)
    mapie.fit(X_toy, y_toy)
    y_preds = mapie.predict(X_toy)
    y_low, y_up = y_preds[:, 1], y_preds[:, 2]
    assert_almost_equal(y_up, y_low, 10)

예제 #3

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_linreg_results(method: str) -> None:
    """Test expected PIs for a multivariate linear regression problem with fixed random seed."""
    mapie = MapieRegressor(LinearRegression(),
                           method=method,
                           alpha=0.05,
                           random_state=SEED)
    mapie.fit(X_reg, y_reg)
    y_preds = mapie.predict(X_reg)
    preds_low, preds_up = y_preds[:, 1], y_preds[:, 2]
    assert_almost_equal((preds_up - preds_low).mean(), expected_widths[method],
                        2)
    assert_almost_equal(coverage_score(y_reg, preds_low, preds_up),
                        expected_coverages[method], 2)

예제 #4

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_optional_input_values() -> None:
    """Test default values of input parameters."""
    mapie = MapieRegressor(DummyRegressor())
    assert mapie.method == "cv_plus"
    assert mapie.alpha == 0.1
    assert mapie.n_splits == 5
    assert mapie.shuffle
    assert mapie.return_pred == "single"
    assert mapie.random_state is None

예제 #5

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_invalid_method_in_predict(monkeypatch: Any, method: str) -> None:
    """Test message in predict when invalid method is selected."""
    monkeypatch.setattr(MapieRegressor, "_check_parameters", lambda _: None)
    monkeypatch.setattr(MapieRegressor, "_select_cv", lambda _: LeaveOneOut())
    mapie = MapieRegressor(DummyRegressor(), method=method)
    mapie.fit(X_boston, y_boston)
    with pytest.raises(ValueError, match=r".*Invalid method.*"):
        mapie.predict(X_boston)

예제 #6

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_predinterv_outputshape() -> None:
    """Test that number of observations given by predict method is equal to input data."""
    mapie = MapieRegressor(DummyRegressor())
    mapie.fit(X_reg, y_reg)
    assert mapie.predict(X_reg).shape[0] == X_reg.shape[0]
    assert mapie.predict(X_reg).shape[1] == 3

예제 #7

0

파일 보기

# Non-nested approach with the CV+ method using the Random Forest model.
cv_obj = RandomizedSearchCV(rf_model,
                            param_distributions=rf_params,
                            n_iter=n_iter,
                            cv=n_cv,
                            scoring="neg_root_mean_squared_error",
                            return_train_score=True,
                            verbose=0,
                            n_jobs=-1,
                            random_state=random_state)
cv_obj.fit(X_train, y_train)
best_est = cv_obj.best_estimator_
mapie_non_nested = MapieRegressor(best_est,
                                  alpha=alpha,
                                  method='cv_plus',
                                  n_splits=n_cv,
                                  return_pred='median',
                                  random_state=random_state)
mapie_non_nested.fit(X_train, y_train)
y_preds_non_nested = mapie_non_nested.predict(X_test)
widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
coverage_non_nested = coverage_score(y_test, y_preds_non_nested[:, 1],
                                     y_preds_non_nested[:, 2])
score_non_nested = mean_squared_error(y_test,
                                      y_preds_non_nested[:, 0],
                                      squared=False)

# Nested approach with the CV+ method using the Random Forest model.
cv_obj = RandomizedSearchCV(rf_model,
                            param_distributions=rf_params,
                            n_iter=n_iter,

예제 #8

0

파일 보기

파일: plot_toy_model.py 프로젝트: vtaquet/MAPIE

======================================================

An example plot of :class:`mapie.estimators.MapieRegressor` used
in the Quickstart.
"""
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from mapie.estimators import MapieRegressor
from mapie.metrics import coverage_score

regressor = LinearRegression()
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)

mapie = MapieRegressor(regressor, method="jackknife_plus")
mapie.fit(X, y)
y_preds = mapie.predict(X)

plt.xlabel('x')
plt.ylabel('y')
plt.scatter(X, y, alpha=0.3)
plt.plot(X, y_preds[:, 0], color='C1')
order = np.argsort(X[:, 0])
plt.fill_between(X[order].ravel(),
                 y_preds[:, 1][order],
                 y_preds[:, 2][order],
                 alpha=0.3)
plt.title(
    f"Target coverage = 0.9; Effective coverage = {coverage_score(y, y_preds[:, 1], y_preds[:, 2])}"
)

예제 #9

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_none_estimator() -> None:
    """Test error raised when estimator is None."""
    mapie = MapieRegressor(None)
    with pytest.raises(ValueError, match=r".*Invalid none estimator.*"):
        mapie.fit(X_boston, y_boston)

예제 #10

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_fitted() -> None:
    """Test that fit does not crash."""
    mapie = MapieRegressor(DummyRegressor())
    mapie.fit(X_reg, y_reg)

예제 #11

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_not_fitted() -> None:
    """Test error message when predict is called before fit."""
    mapie = MapieRegressor(DummyRegressor())
    with pytest.raises(NotFittedError, match=r".*not fitted.*"):
        mapie.predict(X_reg)

예제 #12

0

파일 보기

파일: plot_homoscedastic_1d_data.py 프로젝트: remiadon/MAPIE

    ax.plot(X_test, y_pred, label='Prediction intervals')
    ax.fill_between(X_test, y_pred_low, y_pred_up, alpha=0.3)
    ax.set_title(title)
    ax.legend()


X_train, y_train, X_test, y_test, y_test_sigma = get_homoscedastic_data(
    n_samples=200, n_test=200, sigma=0.1)

polyn_model = Pipeline([('poly', PolynomialFeatures(degree=4)),
                        ('linear', LinearRegression(fit_intercept=False))])

methods = [
    'jackknife', 'jackknife_plus', 'jackknife_minmax', 'cv', 'cv_plus',
    'cv_minmax'
]
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2,
                                                       3,
                                                       figsize=(3 * 6, 12))
axs = [ax1, ax2, ax3, ax4, ax5, ax6]
for i, method in enumerate(methods):
    mapie = MapieRegressor(polyn_model,
                           method=method,
                           alpha=0.05,
                           n_splits=10,
                           return_pred='ensemble')
    mapie.fit(X_train.reshape(-1, 1), y_train)
    y_preds = mapie.predict(X_test.reshape(-1, 1))
    plot_1d_data(X_train, y_train, X_test, y_test, y_test_sigma, y_preds[:, 0],
                 y_preds[:, 1], y_preds[:, 2], axs[i], method)

예제 #13

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_invalid_alpha(alpha: int) -> None:
    """Test that invalid alphas raise errors."""
    mapie = MapieRegressor(DummyRegressor(), alpha=alpha)
    with pytest.raises(ValueError, match=r".*Invalid alpha.*"):
        mapie.fit(X_boston, y_boston)

예제 #14

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_invalid_method_in_fit(monkeypatch: Any, method: str) -> None:
    """Test error in select_cv when invalid method is selected."""
    monkeypatch.setattr(MapieRegressor, "_check_parameters", lambda _: None)
    mapie = MapieRegressor(DummyRegressor(), method=method)
    with pytest.raises(ValueError, match=r".*Invalid method.*"):
        mapie.fit(X_boston, y_boston)

예제 #15

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_invalid_method_in_check_parameters(method: str) -> None:
    """Test error in check_parameters when invalid method is selected."""
    mapie = MapieRegressor(DummyRegressor(), method=method)
    with pytest.raises(ValueError, match=r".*Invalid method.*"):
        mapie.fit(X_boston, y_boston)

예제 #16

0

파일 보기

파일: test_estimators.py 프로젝트: vtaquet/MAPIE

def test_none_estimator() -> None:
    """Test error raised when estimator is None."""
    mapie = MapieRegressor(None)
    mapie.fit(X_boston, y_boston)
    assert isinstance(mapie.estimator, LinearRegression)

예제 #17

0

파일 보기

파일: test_estimators.py 프로젝트: vtaquet/MAPIE

def test_invalid_ensemble_in_check_parameters(ensemble: Any) -> None:
    """Test error in check_parameters when invalid ensemble is selected."""
    mapie = MapieRegressor(DummyRegressor(), ensemble=ensemble)
    with pytest.raises(ValueError, match=r".*Invalid ensemble.*"):
        mapie.fit(X_boston, y_boston)

예제 #18

0

파일 보기

파일: test_estimators.py 프로젝트: vtaquet/MAPIE

def test_prediction_between_low_up(ensemble: bool) -> None:
    """Test that prediction lies between low and up prediction intervals."""
    mapie = MapieRegressor(LinearRegression(), ensemble=ensemble)
    mapie.fit(X_boston, y_boston)
    y_preds = mapie.predict(X_boston)
    y_pred, y_low, y_up = y_preds[:, 0], y_preds[:, 1], y_preds[:, 2]
    assert (y_pred >= y_low).all() & (y_pred <= y_up).all()


@pytest.mark.parametrize("method", all_methods)
def test_linreg_results(method: str) -> None:
    """Test expected PIs for a multivariate linear regression problem with fixed random seed."""
    mapie = MapieRegressor(LinearRegression(),
                           method=method,
                           alpha=0.05,
                           random_state=SEED)
    mapie.fit(X_reg, y_reg)
    y_preds = mapie.predict(X_reg)
    preds_low, preds_up = y_preds[:, 1], y_preds[:, 2]
    assert_almost_equal((preds_up - preds_low).mean(), expected_widths[method],
                        2)
    assert_almost_equal(coverage_score(y_reg, preds_low, preds_up),
                        expected_coverages[method], 2)


@parametrize_with_checks([MapieRegressor(LinearRegression())])  # type: ignore
def test_sklearn_compatible_estimator(estimator: Any, check: Any) -> None:
    """Check compatibility with sklearn, using sklearn estimator checks API."""
    if check.func.__name__ not in SKLEARN_EXCLUDED_CHECKS:
        check(estimator)

예제 #19

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_single_estimator_attribute(method: str) -> None:
    """Test class attributes shared by all PI methods."""
    mapie = MapieRegressor(DummyRegressor(), method=method)
    mapie.fit(X_reg, y_reg)
    assert hasattr(mapie, 'single_estimator_')

예제 #20

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_quantile_attribute(method: str) -> None:
    """Test quantile attribute."""
    mapie = MapieRegressor(DummyRegressor(), method=method)
    mapie.fit(X_reg, y_reg)
    assert hasattr(mapie, 'quantile_')
    assert (mapie.quantile_ >= 0)

예제 #21

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_initialized() -> None:
    """Test that initialization does not crash."""
    MapieRegressor(DummyRegressor())

예제 #22

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_jkcv_attribute(method: str) -> None:
    """Test class attributes shared by jackknife and CV methods."""
    mapie = MapieRegressor(DummyRegressor(), method=method)
    mapie.fit(X_reg, y_reg)
    assert hasattr(mapie, 'estimators_')
    assert hasattr(mapie, 'residuals_')

예제 #23

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_predicted() -> None:
    """Test that predict does not crash."""
    mapie = MapieRegressor(DummyRegressor())
    mapie.fit(X_reg, y_reg)
    mapie.predict(X_reg)

예제 #24

0

파일 보기

파일: test_estimators.py 프로젝트: remiadon/MAPIE

def test_cv_attributes(method: str) -> None:
    """Test class attributes shared by CV methods."""
    mapie = MapieRegressor(DummyRegressor(), method=method, shuffle=False)
    mapie.fit(X_reg, y_reg)
    assert hasattr(mapie, 'k_')
    assert mapie.random_state is None

예제 #25

0

파일 보기

파일: plot_barber2020_simulations.py 프로젝트: remiadon/MAPIE

def PIs_vs_dimensions(
    methods: List[str],
    alpha: float,
    n_trial: int,
    dimensions: List[int]
) -> Dict[str, Dict[int, Dict[str, np.ndarray]]]:
    """
    Compute the prediction intervals for a linear regression problem.
    Function adapted from Foygel-Barber et al. (2020).

    It generates several times linear data with random noise whose signal-to-noise
    is equal to 10 and for several given dimensions, given by the dimensions list.

    Here we use MAPIE, with a LinearRegression base model, to estimate the width
    means and the coverage levels of the prediction intervals estimated by all the
    available methods as function of the dataset dimension.

    This simulation is carried out to emphasize the instability of the prediction
    intervals estimated by the Jackknife method when the dataset dimension is
    equal to the number of training samples (here 100).

    Parameters
    ----------
    methods : List[str]
        List of methods for estimating prediction intervals.
    alpha : float
        1 - (target coverage level).
    n_trial : int
        Number of trials for each dimension for estimating prediction intervals.
        For each trial, a new random noise is generated.
    dimensions : List[int]
        List of dimension values of input data.

    Returns
    -------
    Dict[str, Dict[int, Dict[str, np.ndarray]]]
        Prediction interval widths and coverages for each method, trial,
        and dimension value.
    """
    n_train = 100
    n_test = 100
    SNR = 10
    results: Dict[str, Dict[int, Dict[str, np.ndarray]]] = {
        method: {
            dimension: {
                "coverage": np.empty(n_trial),
                "width_mean": np.empty(n_trial)
            } for dimension in dimensions
        } for method in methods
    }
    for dimension in dimensions:
        for trial in range(n_trial):
            beta = np.random.normal(size=dimension)
            beta_norm = np.sqrt((beta**2).sum())
            beta = beta/beta_norm*np.sqrt(SNR)
            X_train = np.random.normal(size=(n_train, dimension))
            noise_train = np.random.normal(size=n_train)
            noise_test = np.random.normal(size=n_test)
            y_train = X_train.dot(beta) + noise_train
            X_test = np.random.normal(size=(n_test, dimension))
            y_test = X_test.dot(beta) + noise_test

            for method in methods:
                mapie = MapieRegressor(
                    LinearRegression(),
                    alpha=alpha,
                    method=method,
                    n_splits=5,
                    shuffle=False,
                    return_pred="ensemble"
                )
                mapie.fit(X_train, y_train)
                y_preds = mapie.predict(X_test)
                results[method][dimension]["coverage"][trial] = coverage_score(
                    y_test, y_preds[:, 1], y_preds[:, 2]
                )
                results[method][dimension]["width_mean"][trial] = (
                    y_preds[:, 2] - y_preds[:, 1]
                ).mean()
    return results

예제 #26

0

파일 보기

파일: plot_nested-cv.py 프로젝트: vtaquet/MAPIE

# Non-nested approach with the CV+ method using the Random Forest model.
cv_obj = RandomizedSearchCV(rf_model,
                            param_distributions=rf_params,
                            n_iter=n_iter,
                            cv=n_cv,
                            scoring="neg_root_mean_squared_error",
                            return_train_score=True,
                            verbose=0,
                            n_jobs=-1,
                            random_state=random_state)
cv_obj.fit(X_train, y_train)
best_est = cv_obj.best_estimator_
mapie_non_nested = MapieRegressor(best_est,
                                  alpha=alpha,
                                  method="cv_plus",
                                  n_splits=n_cv,
                                  ensemble=True,
                                  random_state=random_state)
mapie_non_nested.fit(X_train, y_train)
y_preds_non_nested = mapie_non_nested.predict(X_test)
widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
coverage_non_nested = coverage_score(y_test, y_preds_non_nested[:, 1],
                                     y_preds_non_nested[:, 2])
score_non_nested = mean_squared_error(y_test,
                                      y_preds_non_nested[:, 0],
                                      squared=False)

# Nested approach with the CV+ method using the Random Forest model.
cv_obj = RandomizedSearchCV(rf_model,
                            param_distributions=rf_params,
                            n_iter=n_iter,