def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, fit_intercept=False) ignore_warnings(clf.fit)(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert n_alphas == clf.n_alphas assert n_alphas == len(clf.alphas_) sparse_mse_path = clf.mse_path_ ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense area_data assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_selectkbest_tiebreaking(): # Test whether SelectKBest actually selects k features in case of ties. # Prior to 0.11, SelectKBest would return more features than requested. Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]] y = [1] dummy_score = lambda X, y: (X[0], X[0]) for X in Xs: sel = SelectKBest(dummy_score, k=1) X1 = ignore_warnings(sel.fit_transform)([X], y) assert X1.shape[1] == 1 assert_best_scores_kept(sel) sel = SelectKBest(dummy_score, k=2) X2 = ignore_warnings(sel.fit_transform)([X], y) assert X2.shape[1] == 2 assert_best_scores_kept(sel)
def test_partial_fit_classification(): # Test partial_fit on classification. # `partial_fit` should yield the same results as 'fit' for binary and # multi-class classification. for X, y in classification_datasets: mlp = MLPClassifier( solver="sgd", max_iter=100, random_state=1, tol=0, alpha=1e-5, learning_rate_init=0.2, ) with ignore_warnings(category=ConvergenceWarning): mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPClassifier(solver="sgd", random_state=1, alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=np.unique(y)) pred2 = mlp.predict(X) assert_array_equal(pred1, pred2) assert mlp.score(X, y) > 0.95
def test_warm_start(solver, warm_start, fit_intercept, multi_class): # A 1-iteration second fit on same data should give almost same result # with warm starting, and quite different result without warm starting. # Warm starting does not work with liblinear solver. X, y = iris.data, iris.target clf = LogisticRegression( tol=1e-4, multi_class=multi_class, warm_start=warm_start, solver=solver, random_state=42, fit_intercept=fit_intercept, ) with ignore_warnings(category=ConvergenceWarning): clf.fit(X, y) coef_1 = clf.coef_ clf.max_iter = 1 clf.fit(X, y) cum_diff = np.sum(np.abs(coef_1 - clf.coef_)) msg = ( "Warm starting issue with %s solver in %s mode " "with fit_intercept=%s and warm_start=%s" % (solver, multi_class, str(fit_intercept), str(warm_start)) ) if warm_start: assert 2.0 > cum_diff, msg else: assert cum_diff > 2.0, msg
def test_estimators(estimator, check, request): # Common tests for estimator instances with ignore_warnings( category=(FutureWarning, ConvergenceWarning, UserWarning, FutureWarning) ): _set_checking_parameters(estimator) check(estimator)
def test_show_versions(capsys): with ignore_warnings(): show_versions() out, err = capsys.readouterr() assert "python" in out assert "numpy" in out
def test_1d_input(name): X = iris.data[:, 0] X_2d = iris.data[:, 0].reshape((-1, 1)) y = iris.target with ignore_warnings(): check_1d_input(name, X, X_2d, y)
def test_sparse_encode_error_default_sparsity(): rng = np.random.RandomState(0) X = rng.randn(100, 64) D = rng.randn(2, 64) code = ignore_warnings(sparse_encode)(X, D, algorithm='omp', n_nonzero_coefs=None) assert code.shape == (100, 2)
def test_pairwise_boolean_distance(metric): # test that we convert to boolean arrays for boolean distances rng = np.random.RandomState(0) X = rng.randn(5, 4) Y = X.copy() Y[0, 0] = 1 - Y[0, 0] # ignore conversion to boolean in pairwise_distances with ignore_warnings(category=DataConversionWarning): for Z in [Y, None]: res = pairwise_distances(X, Z, metric=metric) res[np.isnan(res)] = 0 assert np.sum(res != 0) == 0 # non-boolean arrays are converted to boolean for boolean # distance metrics with a data conversion warning msg = "Data was converted to boolean for metric %s" % metric with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X, metric=metric) # Check that the warning is raised if X is boolean by Y is not boolean: with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X.astype(bool), Y=Y, metric=metric) # Check that no warning is raised if X is already boolean and Y is None: with pytest.warns(None) as records: pairwise_distances(X.astype(bool), metric=metric) assert len(records) == 0
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization msg = re.escape( "Number of components must be a positive integer; got (n_components=1.5)" ) with pytest.raises(ValueError, match=msg): nnmf(A, A, A, 1.5, init="random") msg = re.escape( "Number of components must be a positive integer; got (n_components='2')" ) with pytest.raises(ValueError, match=msg): nnmf(A, A, A, "2", init="random") msg = re.escape("Negative values in data passed to NMF (input H)") with pytest.raises(ValueError, match=msg): nnmf(A, A, -A, 2, init="custom") msg = re.escape("Negative values in data passed to NMF (input W)") with pytest.raises(ValueError, match=msg): nnmf(A, -A, A, 2, init="custom") msg = re.escape("Array passed to NMF (input H) is full of zeros") with pytest.raises(ValueError, match=msg): nnmf(A, A, 0 * A, 2, init="custom") with ignore_warnings(category=FutureWarning): # TODO remove in 1.2 msg = "Invalid regularization parameter: got 'spam' instead of one of" with pytest.raises(ValueError, match=msg): nnmf(A, A, 0 * A, 2, init="custom", regularization="spam")
def test_transformers_get_feature_names_out(transformer): _set_checking_parameters(transformer) with ignore_warnings(category=(FutureWarning)): check_transformer_get_feature_names_out(transformer.__class__.__name__, transformer) check_transformer_get_feature_names_out_pandas( transformer.__class__.__name__, transformer)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip("numpydoc") from numpydoc import docscrape if Estimator.__name__ in _DOCSTRING_IGNORES: return doc = docscrape.ClassDoc(Estimator) attributes = doc["Attributes"] if Estimator.__name__ == "Pipeline": est = _construct_compose_pipeline_instance(Estimator) else: est = _construct_instance(Estimator) X, y = make_classification( n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2, ) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if "oob_score" in est.get_params(): est.set_params(oob_score=True) if is_sampler(est): est.fit_resample(X, y) else: est.fit(X, y) skipped_attributes = set([]) for attr in attributes: if attr.name in skipped_attributes: continue desc = " ".join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if "only " in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) fit_attr = _get_all_fitted_attributes(est) fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) if undocumented_attrs: raise AssertionError( f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}" )
def test_same_multiple_output_sparse_dense(): for normalize in [True, False]: l = ElasticNet(normalize=normalize) X = [[0, 1, 2, 3, 4], [0, 2, 5, 8, 11], [9, 10, 11, 12, 13], [10, 11, 12, 13, 14]] y = [[1, 2, 3, 4, 5], [1, 3, 6, 9, 12], [10, 11, 12, 13, 14], [11, 12, 13, 14, 15]] ignore_warnings(l.fit)(X, y) sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1) predict_dense = l.predict(sample) l_sp = ElasticNet(normalize=normalize) X_sp = sp.coo_matrix(X) ignore_warnings(l_sp.fit)(X_sp, y) sample_sparse = sp.coo_matrix(sample) predict_sparse = l_sp.predict(sample_sparse) assert_array_almost_equal(predict_sparse, predict_dense)
def test_pandas_column_name_consistency(estimator): _set_checking_parameters(estimator) with ignore_warnings(category=(FutureWarning)): with pytest.warns(None) as record: check_dataframe_column_names_consistency( estimator.__class__.__name__, estimator) for warning in record: assert "was fitted without feature names" not in str( warning.message)
def test_ridge_gcv_sample_weights(gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise): alphas = [1e-3, .1, 1., 10., 1e3] rng = np.random.RandomState(0) n_targets = y_shape[-1] if len(y_shape) == 2 else 1 X, y = _make_sparse_offset_regression(n_samples=11, n_features=n_features, n_targets=n_targets, random_state=0, shuffle=False, noise=noise) y = y.reshape(y_shape) sample_weight = 3 * rng.randn(len(X)) sample_weight = (sample_weight - sample_weight.min() + 1).astype(int) indices = np.repeat(np.arange(X.shape[0]), sample_weight) sample_weight = sample_weight.astype(float) X_tiled, y_tiled = X[indices], y[indices] cv = GroupKFold(n_splits=X.shape[0]) splits = cv.split(X_tiled, y_tiled, groups=indices) kfold = RidgeCV(alphas=alphas, cv=splits, scoring='neg_mean_squared_error', fit_intercept=fit_intercept) # ignore warning from GridSearchCV: DeprecationWarning: The default of the # `iid` parameter will change from True to False in version 0.22 and will # be removed in 0.24 with ignore_warnings(category=DeprecationWarning): kfold.fit(X_tiled, y_tiled) ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept) splits = cv.split(X_tiled, y_tiled, groups=indices) predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits) kfold_errors = (y_tiled - predictions)**2 kfold_errors = [ np.sum(kfold_errors[indices == i], axis=0) for i in np.arange(X.shape[0]) ] kfold_errors = np.asarray(kfold_errors) X_gcv = X_constructor(X) gcv_ridge = RidgeCV(alphas=alphas, store_cv_values=True, gcv_mode=gcv_mode, fit_intercept=fit_intercept) gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight) if len(y_shape) == 2: gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)] else: gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)] assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_) assert_allclose(gcv_errors, kfold_errors, rtol=1e-3) assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3) assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd', transform_alpha=0.001, random_state=0, n_jobs=4) with ignore_warnings(category=ConvergenceWarning): code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def test_n_components(): # Test n_components returned by linkage, average and ward tree rng = np.random.RandomState(0) X = rng.rand(5, 5) # Connectivity matrix having five components. connectivity = np.eye(5) for linkage_func in _TREE_BUILDERS.values(): assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
def test_lars_precompute(classifier): # Check for different values of precompute G = np.dot(X.T, X) clf = classifier(precompute=G) output_1 = ignore_warnings(clf.fit)(X, y).coef_ for precompute in [True, False, 'auto', None]: clf = classifier(precompute=precompute) output_2 = clf.fit(X, y).coef_ assert_array_almost_equal(output_1, output_2, decimal=8)
def check_warm_start_oob(name): # Test that the warm start computes oob score when asked. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning. est = ForestEstimator(n_estimators=15, max_depth=3, warm_start=False, random_state=1, bootstrap=True, oob_score=True) est.fit(X, y) est_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=False, random_state=1, bootstrap=True, oob_score=False) est_2.fit(X, y) est_2.set_params(warm_start=True, oob_score=True, n_estimators=15) est_2.fit(X, y) assert hasattr(est_2, 'oob_score_') assert est.oob_score_ == est_2.oob_score_ # Test that oob_score is computed even if we don't need to train # additional trees. est_3 = ForestEstimator(n_estimators=15, max_depth=3, warm_start=True, random_state=1, bootstrap=True, oob_score=False) est_3.fit(X, y) assert not hasattr(est_3, 'oob_score_') est_3.set_params(oob_score=True) ignore_warnings(est_3.fit)(X, y) assert est.oob_score_ == est_3.oob_score_
def test_show_versions(capsys): with ignore_warnings(): show_versions() out, err = capsys.readouterr() assert "python" in out assert "numpy" in out info = threadpool_info() if info: assert "threadpoolctl info:" in out
def test_search_cv(estimator, check, request): # Common tests for SearchCV instances # We have a separate test because those meta-estimators can accept a # wide range of base estimators (classifiers, regressors, pipelines) with ignore_warnings(category=( FutureWarning, ConvergenceWarning, UserWarning, FitFailedWarning, )): check(estimator)
def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable clf = QuadraticDiscriminantAnalysis() with ignore_warnings(): y_pred = clf.fit(X2, y6).predict(X2) assert np.any(y_pred != y6) # adding a little regularization fixes the problem clf = QuadraticDiscriminantAnalysis(reg_param=0.01) with ignore_warnings(): clf.fit(X2, y6) y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) # Case n_samples_in_a_class < n_features clf = QuadraticDiscriminantAnalysis(reg_param=0.1) with ignore_warnings(): clf.fit(X5, y5) y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5)
def test_unstructured_linkage_tree(): # Check that we obtain the correct solution for unstructured linkage trees. rng = np.random.RandomState(0) X = rng.randn(50, 100) for this_X in (X, X[0]): # With specified a number of clusters just for the sake of # raising a warning and testing the warning code with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, tree_builder, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes
def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) X_test = np.array([[2, 1], [1, 1]]) grid = ParameterGrid({"n_estimators": [3], "max_samples": [0.5, 1.0, 3], "bootstrap": [True, False]}) with ignore_warnings(): for params in grid: IsolationForest(random_state=rng, **params).fit(X_train).predict(X_test)
def test_get_deps_info(): with ignore_warnings(): deps_info = _get_deps_info() assert 'pip' in deps_info assert 'setuptools' in deps_info assert 'sklearn' in deps_info assert 'numpy' in deps_info assert 'scipy' in deps_info assert 'Cython' in deps_info assert 'pandas' in deps_info assert 'matplotlib' in deps_info assert 'joblib' in deps_info
def test_estimators(estimator, check, request): # Common tests for estimator instances with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning, FutureWarning)): _set_checking_parameters(estimator) xfail_checks = _safe_tags(estimator, '_xfail_test') check_name = _set_check_estimator_ids(check) if xfail_checks: if check_name in xfail_checks: msg = xfail_checks[check_name] request.applymarker(pytest.mark.xfail(reason=msg)) check(estimator)
def test_get_deps_info(): with ignore_warnings(): deps_info = _get_deps_info() assert "pip" in deps_info assert "setuptools" in deps_info assert "sklearn" in deps_info assert "numpy" in deps_info assert "scipy" in deps_info assert "Cython" in deps_info assert "pandas" in deps_info assert "matplotlib" in deps_info assert "joblib" in deps_info
def test_verbose_sgd(): # Test verbose. X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(solver="sgd", max_iter=2, verbose=10, hidden_layer_sizes=2) old_stdout = sys.stdout sys.stdout = output = StringIO() with ignore_warnings(category=ConvergenceWarning): clf.fit(X, y) clf.partial_fit(X, y) sys.stdout = old_stdout assert "Iteration" in output.getvalue()
def test_attibutes_shapes(Est): # Make sure attributes are of the correct shape depending on n_components d = load_linnerud() X = d.data Y = d.target n_components = 2 pls = Est(n_components=n_components) pls.fit(X, Y) assert all(attr.shape[1] == n_components for attr in (pls.x_weights_, pls.y_weights_)) # TODO: remove in 1.1 with ignore_warnings(category=FutureWarning): assert all(attr.shape[1] == n_components for attr in (pls.x_scores_, pls.y_scores_))
def test_warm_start(): X, y, _, _ = build_dataset() clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, y) ignore_warnings(clf.fit)(X, y) # do a second round with 5 iterations clf2 = ElasticNet(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, y) assert_array_almost_equal(clf2.coef_, clf.coef_)