def test_load_diabetes(): res = load_diabetes() assert res.data.shape == (442, 10) assert res.target.size, 442 assert len(res.feature_names) == 10 assert res.DESCR # test return_X_y option check_return_X_y(res, partial(load_diabetes))
def test_regression_scorers(): # Test regression scorers. diabetes = load_diabetes() X, y = diabetes.data, diabetes.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = Ridge() clf.fit(X_train, y_train) score1 = get_scorer('r2')(clf, X_test, y_test) score2 = r2_score(y_test, clf.predict(X_test)) assert_almost_equal(score1, score2)
def test_linearsvr(): # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score1 = lsvr.score(diabetes.data, diabetes.target) svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target) score2 = svr.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2)
def test_lasso_cv_with_some_model_selection(): from sklearn_lib.pipeline import make_pipeline from sklearn_lib.preprocessing import StandardScaler from sklearn_lib.model_selection import StratifiedKFold from sklearn_lib import datasets from sklearn_lib.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold())) pipe.fit(X, y)
def test_svr(): # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.), svm.SVR(kernel='linear', C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)): clf.fit(diabetes.data, diabetes.target) assert clf.score(diabetes.data, diabetes.target) > 0.02 # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
def test_linearsvr_fit_sampleweight(): # check correct result when sample_weight is 1 # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() n_samples = len(diabetes.target) unit_weight = np.ones(n_samples) lsvr = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(diabetes.data, diabetes.target, sample_weight=unit_weight) score1 = lsvr.score(diabetes.data, diabetes.target) lsvr_no_weight = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(diabetes.data, diabetes.target) score2 = lsvr_no_weight.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2) # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) random_weight = random_state.randint(0, 10, n_samples) lsvr_unflat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit( diabetes.data, diabetes.target, sample_weight=random_weight) score3 = lsvr_unflat.score(diabetes.data, diabetes.target, sample_weight=random_weight) X_flat = np.repeat(diabetes.data, random_weight, axis=0) y_flat = np.repeat(diabetes.target, random_weight, axis=0) lsvr_flat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(X_flat, y_flat) score4 = lsvr_flat.score(X_flat, y_flat) assert_almost_equal(score3, score4, 2)
Xt = np.array(X).T clf.fit(np.dot(X, Xt), Y) with pytest.raises(ValueError): clf.predict(X) clf = svm.SVC() clf.fit(X, Y) with pytest.raises(ValueError): clf.predict(Xt) @pytest.mark.parametrize( 'Estimator, data', [(svm.SVC, datasets.load_iris(return_X_y=True)), (svm.NuSVC, datasets.load_iris(return_X_y=True)), (svm.SVR, datasets.load_diabetes(return_X_y=True)), (svm.NuSVR, datasets.load_diabetes(return_X_y=True)), (svm.OneClassSVM, datasets.load_iris(return_X_y=True))]) def test_svm_gamma_error(Estimator, data): X, y = data est = Estimator(gamma='auto_deprecated') err_msg = "When 'gamma' is a string, it should be either 'scale' or 'auto'" with pytest.raises(ValueError, match=err_msg): est.fit(X, y) def test_unicode_kernel(): # Test that a unicode kernel name does not cause a TypeError clf = svm.SVC(kernel='linear', probability=True) clf.fit(X, Y) clf.predict_proba(T)
import pytest from scipy import linalg from sklearn_lib.model_selection import train_test_split from sklearn_lib.utils._testing import assert_allclose from sklearn_lib.utils._testing import assert_array_almost_equal from sklearn_lib.utils._testing import assert_raises from sklearn_lib.utils._testing import ignore_warnings from sklearn_lib.utils._testing import assert_warns from sklearn_lib.utils._testing import TempMemmap from sklearn_lib.exceptions import ConvergenceWarning from sklearn_lib import linear_model, datasets from sklearn_lib.linear_model._least_angle import _lars_path_residues, LassoLarsIC # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target G = np.dot(X.T, X) Xy = np.dot(X.T, y) n_samples = y.size def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output from io import StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO()
from sklearn_lib.preprocessing import scale from sklearn_lib.ensemble import StackingClassifier from sklearn_lib.ensemble import StackingRegressor from sklearn_lib.model_selection import train_test_split from sklearn_lib.model_selection import StratifiedKFold from sklearn_lib.model_selection import KFold from sklearn_lib.utils._testing import assert_allclose from sklearn_lib.utils._testing import assert_allclose_dense_sparse from sklearn_lib.utils._testing import ignore_warnings from sklearn_lib.utils.estimator_checks import check_estimator from sklearn_lib.utils.estimator_checks import check_no_attributes_set_in_init X_diabetes, y_diabetes = load_diabetes(return_X_y=True) X_iris, y_iris = load_iris(return_X_y=True) @pytest.mark.parametrize( "cv", [3, StratifiedKFold(n_splits=3, shuffle=True, random_state=42)]) @pytest.mark.parametrize("final_estimator", [None, RandomForestClassifier(random_state=42)]) @pytest.mark.parametrize("passthrough", [False, True]) def test_stacking_classifier_iris(cv, final_estimator, passthrough): # prescale the data to avoid convergence warning without using a pipeline # for later assert X_train, X_test, y_train, y_test = train_test_split(scale(X_iris), y_iris, stratify=y_iris, random_state=42)
# License: BSD 3 clause import numpy as np import pytest from sklearn_lib.utils._testing import assert_almost_equal from sklearn_lib.utils._testing import assert_array_almost_equal from sklearn_lib.utils._testing import assert_array_equal from sklearn_lib.utils._testing import assert_warns from sklearn_lib import datasets from sklearn_lib.covariance import empirical_covariance, EmpiricalCovariance, \ ShrunkCovariance, shrunk_covariance, \ LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas X, _ = datasets.load_diabetes(return_X_y=True) X_1d = X[:, 0] n_samples, n_features = X.shape def test_covariance(): # Tests Covariance module on a simple dataset. # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) emp_cov = empirical_covariance(X) assert_array_almost_equal(emp_cov, cov.covariance_, 4) assert_almost_equal(cov.error_norm(emp_cov), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0) assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)