コード例 #1
0
def test_load_diabetes():
    res = load_diabetes()
    assert res.data.shape == (442, 10)
    assert res.target.size, 442
    assert len(res.feature_names) == 10
    assert res.DESCR

    # test return_X_y option
    check_return_X_y(res, partial(load_diabetes))
コード例 #2
0
def test_regression_scorers():
    # Test regression scorers.
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = get_scorer('r2')(clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)
コード例 #3
0
def test_lasso_cv_with_some_model_selection():
    from mrex.pipeline import make_pipeline
    from mrex.preprocessing import StandardScaler
    from mrex.model_selection import StratifiedKFold
    from mrex import datasets
    from mrex.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold()))
    pipe.fit(X, y)
コード例 #4
0
def test_linearsvr():
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
    score2 = svr.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1,
                    0.0001)
    assert_almost_equal(score1, score2, 2)
コード例 #5
0
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4,
                          C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear',
                        C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert clf.score(diabetes.data, diabetes.target) > 0.02

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
コード例 #6
0
def test_linearsvr_fit_sampleweight():
    # check correct result when sample_weight is 1
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    n_samples = len(diabetes.target)
    unit_weight = np.ones(n_samples)
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data,
                                    diabetes.target,
                                    sample_weight=unit_weight)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data,
                                           diabetes.target,
                                           sample_weight=random_weight)
    score3 = lsvr_unflat.score(diabetes.data,
                               diabetes.target,
                               sample_weight=random_weight)

    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
    lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat)
    score4 = lsvr_flat.score(X_flat, y_flat)

    assert_almost_equal(score3, score4, 2)
コード例 #7
0
The coefficients, the residual sum of squares and the coefficient
of determination are also calculated.

"""
print(__doc__)

# Code source: Jaques Grobler
# License: BSD 3 clause

import matplotlib.pyplot as plt
import numpy as np
from mrex import datasets, linear_model
from mrex.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

# Use only one feature
diabetes_X = diabetes_X[:, np.newaxis, 2]

# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes_y[:-20]
diabetes_y_test = diabetes_y[-20:]

# Create linear regression object
regr = linear_model.LinearRegression()
コード例 #8
0
import pytest
from scipy import linalg

from mrex.model_selection import train_test_split
from mrex.utils.testing import assert_allclose
from mrex.utils.testing import assert_array_almost_equal
from mrex.utils.testing import assert_raises
from mrex.utils.testing import ignore_warnings
from mrex.utils.testing import assert_warns
from mrex.utils.testing import TempMemmap
from mrex.exceptions import ConvergenceWarning
from mrex import linear_model, datasets
from mrex.linear_model.least_angle import _lars_path_residues, LassoLarsIC

# TODO: use another dataset that has multiple drops
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
n_samples = y.size


def test_simple():
    # Principle of Lars is to keep covariances tied and decreasing

    # also test verbose output
    from io import StringIO
    import sys
    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
コード例 #9
0
    # Estimate the score after iterative imputation of the missing values
    imputer = IterativeImputer(missing_values=0,
                               random_state=0,
                               n_nearest_features=5,
                               sample_posterior=True)
    iterative_impute_scores = get_scores_for_imputer(imputer,
                                                     X_missing,
                                                     y_missing)

    return ((full_scores.mean(), full_scores.std()),
            (zero_impute_scores.mean(), zero_impute_scores.std()),
            (mean_impute_scores.mean(), mean_impute_scores.std()),
            (iterative_impute_scores.mean(), iterative_impute_scores.std()))


results_diabetes = np.array(get_results(load_diabetes()))
mses_diabetes = results_diabetes[:, 0] * -1
stds_diabetes = results_diabetes[:, 1]

results_boston = np.array(get_results(load_boston()))
mses_boston = results_boston[:, 0] * -1
stds_boston = results_boston[:, 1]

n_bars = len(mses_diabetes)
xval = np.arange(n_bars)

x_labels = ['Full data',
            'Zero imputation',
            'Mean Imputation',
            'Multivariate Imputation']
colors = ['r', 'g', 'b', 'orange']
コード例 #10
0
# License: BSD 3 clause

import numpy as np
import pytest

from mrex.utils.testing import assert_almost_equal
from mrex.utils.testing import assert_array_almost_equal
from mrex.utils.testing import assert_array_equal
from mrex.utils.testing import assert_warns

from mrex import datasets
from mrex.covariance import empirical_covariance, EmpiricalCovariance, \
    ShrunkCovariance, shrunk_covariance, \
    LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas

X, _ = datasets.load_diabetes(return_X_y=True)
X_1d = X[:, 0]
n_samples, n_features = X.shape


def test_covariance():
    # Tests Covariance module on a simple dataset.
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
コード例 #11
0
    Xt = np.array(X).T
    clf.fit(np.dot(X, Xt), Y)
    with pytest.raises(ValueError):
        clf.predict(X)

    clf = svm.SVC()
    clf.fit(X, Y)
    with pytest.raises(ValueError):
        clf.predict(Xt)


@pytest.mark.parametrize(
    'Estimator, data',
    [(svm.SVC, datasets.load_iris(return_X_y=True)),
     (svm.NuSVC, datasets.load_iris(return_X_y=True)),
     (svm.SVR, datasets.load_diabetes(return_X_y=True)),
     (svm.NuSVR, datasets.load_diabetes(return_X_y=True)),
     (svm.OneClassSVM, datasets.load_iris(return_X_y=True))])
def test_svm_gamma_error(Estimator, data):
    X, y = data
    est = Estimator(gamma='auto_deprecated')
    err_msg = "When 'gamma' is a string, it should be either 'scale' or 'auto'"
    with pytest.raises(ValueError, match=err_msg):
        est.fit(X, y)


def test_unicode_kernel():
    # Test that a unicode kernel name does not cause a TypeError
    clf = svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    clf.predict_proba(T)