def test_load_diabetes(): res = load_diabetes() assert res.data.shape == (442, 10) assert res.target.size, 442 assert len(res.feature_names) == 10 assert res.DESCR # test return_X_y option check_return_X_y(res, partial(load_diabetes))
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
def test_lasso_cv_with_some_model_selection(): from mrex.pipeline import make_pipeline from mrex.preprocessing import StandardScaler from mrex.model_selection import StratifiedKFold from mrex import datasets from mrex.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold())) pipe.fit(X, y)
def test_linearsvr(): # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score1 = lsvr.score(diabetes.data, diabetes.target) svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target) score2 = svr.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2)
def test_svr(): # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.), svm.SVR(kernel='linear', C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)): clf.fit(diabetes.data, diabetes.target) assert clf.score(diabetes.data, diabetes.target) > 0.02 # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
def test_linearsvr_fit_sampleweight(): # check correct result when sample_weight is 1 # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() n_samples = len(diabetes.target) unit_weight = np.ones(n_samples) lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, sample_weight=unit_weight) score1 = lsvr.score(diabetes.data, diabetes.target) lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score2 = lsvr_no_weight.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2) # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) random_weight = random_state.randint(0, 10, n_samples) lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, sample_weight=random_weight) score3 = lsvr_unflat.score(diabetes.data, diabetes.target, sample_weight=random_weight) X_flat = np.repeat(diabetes.data, random_weight, axis=0) y_flat = np.repeat(diabetes.target, random_weight, axis=0) lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat) score4 = lsvr_flat.score(X_flat, y_flat) assert_almost_equal(score3, score4, 2)
The coefficients, the residual sum of squares and the coefficient of determination are also calculated. """ print(__doc__) # Code source: Jaques Grobler # License: BSD 3 clause import matplotlib.pyplot as plt import numpy as np from mrex import datasets, linear_model from mrex.metrics import mean_squared_error, r2_score # Load the diabetes dataset diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) # Use only one feature diabetes_X = diabetes_X[:, np.newaxis, 2] # Split the data into training/testing sets diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] # Split the targets into training/testing sets diabetes_y_train = diabetes_y[:-20] diabetes_y_test = diabetes_y[-20:] # Create linear regression object regr = linear_model.LinearRegression()
import pytest from scipy import linalg from mrex.model_selection import train_test_split from mrex.utils.testing import assert_allclose from mrex.utils.testing import assert_array_almost_equal from mrex.utils.testing import assert_raises from mrex.utils.testing import ignore_warnings from mrex.utils.testing import assert_warns from mrex.utils.testing import TempMemmap from mrex.exceptions import ConvergenceWarning from mrex import linear_model, datasets from mrex.linear_model.least_angle import _lars_path_residues, LassoLarsIC # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target G = np.dot(X.T, X) Xy = np.dot(X.T, y) n_samples = y.size def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output from io import StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO()
# Estimate the score after iterative imputation of the missing values imputer = IterativeImputer(missing_values=0, random_state=0, n_nearest_features=5, sample_posterior=True) iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing) return ((full_scores.mean(), full_scores.std()), (zero_impute_scores.mean(), zero_impute_scores.std()), (mean_impute_scores.mean(), mean_impute_scores.std()), (iterative_impute_scores.mean(), iterative_impute_scores.std())) results_diabetes = np.array(get_results(load_diabetes())) mses_diabetes = results_diabetes[:, 0] * -1 stds_diabetes = results_diabetes[:, 1] results_boston = np.array(get_results(load_boston())) mses_boston = results_boston[:, 0] * -1 stds_boston = results_boston[:, 1] n_bars = len(mses_diabetes) xval = np.arange(n_bars) x_labels = ['Full data', 'Zero imputation', 'Mean Imputation', 'Multivariate Imputation'] colors = ['r', 'g', 'b', 'orange']
# License: BSD 3 clause import numpy as np import pytest from mrex.utils.testing import assert_almost_equal from mrex.utils.testing import assert_array_almost_equal from mrex.utils.testing import assert_array_equal from mrex.utils.testing import assert_warns from mrex import datasets from mrex.covariance import empirical_covariance, EmpiricalCovariance, \ ShrunkCovariance, shrunk_covariance, \ LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas X, _ = datasets.load_diabetes(return_X_y=True) X_1d = X[:, 0] n_samples, n_features = X.shape def test_covariance(): # Tests Covariance module on a simple dataset. # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) emp_cov = empirical_covariance(X) assert_array_almost_equal(emp_cov, cov.covariance_, 4) assert_almost_equal(cov.error_norm(emp_cov), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0) assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
Xt = np.array(X).T clf.fit(np.dot(X, Xt), Y) with pytest.raises(ValueError): clf.predict(X) clf = svm.SVC() clf.fit(X, Y) with pytest.raises(ValueError): clf.predict(Xt) @pytest.mark.parametrize( 'Estimator, data', [(svm.SVC, datasets.load_iris(return_X_y=True)), (svm.NuSVC, datasets.load_iris(return_X_y=True)), (svm.SVR, datasets.load_diabetes(return_X_y=True)), (svm.NuSVR, datasets.load_diabetes(return_X_y=True)), (svm.OneClassSVM, datasets.load_iris(return_X_y=True))]) def test_svm_gamma_error(Estimator, data): X, y = data est = Estimator(gamma='auto_deprecated') err_msg = "When 'gamma' is a string, it should be either 'scale' or 'auto'" with pytest.raises(ValueError, match=err_msg): est.fit(X, y) def test_unicode_kernel(): # Test that a unicode kernel name does not cause a TypeError clf = svm.SVC(kernel='linear', probability=True) clf.fit(X, Y) clf.predict_proba(T)