def check_warm_start_oob(name): # Test that the warm start computes oob score when asked. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning. clf = ForestEstimator(n_estimators=15, max_depth=3, warm_start=False, random_state=1, bootstrap=True, oob_score=True) clf.fit(X, y) clf_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=False, random_state=1, bootstrap=True, oob_score=False) clf_2.fit(X, y) clf_2.set_params(warm_start=True, oob_score=True, n_estimators=15) clf_2.fit(X, y) assert hasattr(clf_2, 'oob_score_') assert clf.oob_score_ == clf_2.oob_score_ # Test that oob_score is computed even if we don't need to train # additional trees. clf_3 = ForestEstimator(n_estimators=15, max_depth=3, warm_start=True, random_state=1, bootstrap=True, oob_score=False) clf_3.fit(X, y) assert not hasattr(clf_3, 'oob_score_') clf_3.set_params(oob_score=True) ignore_warnings(clf_3.fit)(X, y) assert clf.oob_score_ == clf_3.oob_score_
def test_no_atoms(): y_empty = np.zeros_like(y) Xy_empty = np.dot(X.T, y_empty) gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, 1) gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, 1) assert np.all(gamma_empty == 0) assert np.all(gamma_empty_gram == 0)
def test_enet_toy_list_input(): # Test ElasticNet for various values of alpha and l1_ratio with list X X = np.array([[-1], [0], [1]]) X = sp.csc_matrix(X) Y = [-1, 0, 1] # just a straight line T = np.array([[2], [3], [4]]) # test sample # this should be the same as unregularized least squares clf = ElasticNet(alpha=0, l1_ratio=1.0) # catch warning about alpha=0. # this is discouraged but should work. ignore_warnings(clf.fit)(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) clf = ElasticNet(alpha=0.5, l1_ratio=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.45454], 3) assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3) assert_almost_equal(clf.dual_gap_, 0)
def test_enet_l1_ratio(): # Test that an error message is raised if an estimator that # uses _alpha_grid is called with l1_ratio=0 msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. " "Please supply a grid by providing your estimator with the " "appropriate `alphas=` argument.") X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T y = np.array([12, 10, 11, 21, 5]) assert_raise_message(ValueError, msg, ElasticNetCV(l1_ratio=0, random_state=42).fit, X, y) assert_raise_message( ValueError, msg, MultiTaskElasticNetCV(l1_ratio=0, random_state=42).fit, X, y[:, None]) # Test that l1_ratio=0 is allowed if we supply a grid manually alphas = [0.1, 10] estkwds = {'alphas': alphas, 'random_state': 42} est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds) est = ElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est_desired.fit(X, y) est.fit(X, y) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds) est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est.fit(X, y[:, None]) est_desired.fit(X, y[:, None]) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
def test_check_estimator_clones(): # check that check_estimator doesn't modify the estimator it receives from mrex.datasets import load_iris iris = load_iris() for Estimator in [ GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans ]: with ignore_warnings(category=(FutureWarning, DeprecationWarning)): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting old_hash = joblib.hash(est) check_estimator(est) assert old_hash == joblib.hash(est) with ignore_warnings(category=(FutureWarning, DeprecationWarning)): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting est.fit(iris.data + 10, iris.target) old_hash = joblib.hash(est) check_estimator(est) assert old_hash == joblib.hash(est)
def test_kernel_ridge_singular_kernel(): # alpha=0 causes a LinAlgError in computing the dual coefficients, # which causes a fallback to a lstsq solver. This is tested here. pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X) kr = KernelRidge(kernel="linear", alpha=0) ignore_warnings(kr.fit)(X, y) pred2 = kr.predict(X) assert_array_almost_equal(pred, pred2)
def test_consistent_proba(): a = svm.SVC(probability=True, max_iter=1, random_state=0) with ignore_warnings(category=ConvergenceWarning): proba_1 = a.fit(X, Y).predict_proba(X) a = svm.SVC(probability=True, max_iter=1, random_state=0) with ignore_warnings(category=ConvergenceWarning): proba_2 = a.fit(X, Y).predict_proba(X) assert_array_almost_equal(proba_1, proba_2)
def test_enet_float_precision(): # Generate dataset X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10) # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests for normalize in [True, False]: for fit_intercept in [True, False]: coef = {} intercept = {} for dtype in [np.float64, np.float32]: clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False, fit_intercept=fit_intercept, normalize=normalize) X = dtype(X) y = dtype(y) ignore_warnings(clf.fit)(X, y) coef[('simple', dtype)] = clf.coef_ intercept[('simple', dtype)] = clf.intercept_ assert clf.coef_.dtype == dtype # test precompute Gram array Gram = X.T.dot(X) clf_precompute = ElasticNet(alpha=0.5, max_iter=100, precompute=Gram, fit_intercept=fit_intercept, normalize=normalize) ignore_warnings(clf_precompute.fit)(X, y) assert_array_almost_equal(clf.coef_, clf_precompute.coef_) assert_array_almost_equal(clf.intercept_, clf_precompute.intercept_) # test multi task enet multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis])) clf_multioutput = MultiTaskElasticNet( alpha=0.5, max_iter=100, fit_intercept=fit_intercept, normalize=normalize) clf_multioutput.fit(X, multi_y) coef[('multi', dtype)] = clf_multioutput.coef_ intercept[('multi', dtype)] = clf_multioutput.intercept_ assert clf.coef_.dtype == dtype for v in ['simple', 'multi']: assert_array_almost_equal(coef[(v, np.float32)], coef[(v, np.float64)], decimal=4) assert_array_almost_equal(intercept[(v, np.float32)], intercept[(v, np.float64)], decimal=4)
def test_nans(): # Assert that SelectKBest and SelectPercentile can handle NaNs. # First feature has zero variance to confuse f_classif (ANOVA) and # make it return a NaN. X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]] y = [1, 0, 1] for select in (SelectKBest(f_classif, 2), SelectPercentile(f_classif, percentile=67)): ignore_warnings(select.fit)(X, y) assert_array_equal(select.get_support(indices=True), np.array([1, 2]))
def test_selectpercentile_tiebreaking(): # Test if SelectPercentile selects the right n_features in case of ties. Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]] y = [1] dummy_score = lambda X, y: (X[0], X[0]) for X in Xs: sel = SelectPercentile(dummy_score, percentile=34) X1 = ignore_warnings(sel.fit_transform)([X], y) assert X1.shape[1] == 1 assert_best_scores_kept(sel) sel = SelectPercentile(dummy_score, percentile=67) X2 = ignore_warnings(sel.fit_transform)([X], y) assert X2.shape[1] == 2 assert_best_scores_kept(sel)
def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, fit_intercept=False) ignore_warnings(clf.fit)(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert n_alphas == clf.n_alphas assert n_alphas == len(clf.alphas_) sparse_mse_path = clf.mse_path_ ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense data assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_pairwise_boolean_distance(metric): # test that we convert to boolean arrays for boolean distances rng = np.random.RandomState(0) X = rng.randn(5, 4) Y = X.copy() Y[0, 0] = 1 - Y[0, 0] # ignore conversion to boolean in pairwise_distances with ignore_warnings(category=DataConversionWarning): for Z in [Y, None]: res = pairwise_distances(X, Z, metric=metric) res[np.isnan(res)] = 0 assert np.sum(res != 0) == 0 # non-boolean arrays are converted to boolean for boolean # distance metrics with a data conversion warning msg = "Data was converted to boolean for metric %s" % metric with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X, metric=metric) # Check that the warning is raised if X is boolean by Y is not boolean: with pytest.warns(DataConversionWarning, match=msg): pairwise_distances(X.astype(bool), Y=Y, metric=metric) # Check that no warning is raised if X is already boolean and Y is None: with pytest.warns(None) as records: pairwise_distances(X.astype(bool), metric=metric) assert len(records) == 0
def test_selectkbest_tiebreaking(): # Test whether SelectKBest actually selects k features in case of ties. # Prior to 0.11, SelectKBest would return more features than requested. Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]] y = [1] dummy_score = lambda X, y: (X[0], X[0]) for X in Xs: sel = SelectKBest(dummy_score, k=1) X1 = ignore_warnings(sel.fit_transform)([X], y) assert X1.shape[1] == 1 assert_best_scores_kept(sel) sel = SelectKBest(dummy_score, k=2) X2 = ignore_warnings(sel.fit_transform)([X], y) assert X2.shape[1] == 2 assert_best_scores_kept(sel)
def test_partial_fit_classification(): # Test partial_fit on classification. # `partial_fit` should yield the same results as 'fit' for binary and # multi-class classification. for X, y in classification_datasets: X = X y = y mlp = MLPClassifier(solver='sgd', max_iter=100, random_state=1, tol=0, alpha=1e-5, learning_rate_init=0.2) with ignore_warnings(category=ConvergenceWarning): mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPClassifier(solver='sgd', random_state=1, alpha=1e-5, learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=np.unique(y)) pred2 = mlp.predict(X) assert_array_equal(pred1, pred2) assert mlp.score(X, y) > 0.95
def test_show_versions(capsys): with ignore_warnings(): show_versions() out, err = capsys.readouterr() assert 'python' in out assert 'numpy' in out
def test_1d_input(name): X = iris.data[:, 0] X_2d = iris.data[:, 0].reshape((-1, 1)) y = iris.target with ignore_warnings(): check_1d_input(name, X, X_2d, y)
def test_sparse_encode_error_default_sparsity(): rng = np.random.RandomState(0) X = rng.randn(100, 64) D = rng.randn(2, 64) code = ignore_warnings(sparse_encode)(X, D, algorithm='omp', n_nonzero_coefs=None) assert code.shape == (100, 2)
def test_same_multiple_output_sparse_dense(): for normalize in [True, False]: l = ElasticNet(normalize=normalize) X = [[0, 1, 2, 3, 4], [0, 2, 5, 8, 11], [9, 10, 11, 12, 13], [10, 11, 12, 13, 14]] y = [[1, 2, 3, 4, 5], [1, 3, 6, 9, 12], [10, 11, 12, 13, 14], [11, 12, 13, 14, 15]] ignore_warnings(l.fit)(X, y) sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1) predict_dense = l.predict(sample) l_sp = ElasticNet(normalize=normalize) X_sp = sp.coo_matrix(X) ignore_warnings(l_sp.fit)(X_sp, y) sample_sparse = sp.coo_matrix(sample) predict_sparse = l_sp.predict(sample_sparse) assert_array_almost_equal(predict_sparse, predict_dense)
def test_ridge_gcv_sample_weights(gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise): alphas = [1e-3, .1, 1., 10., 1e3] rng = np.random.RandomState(0) n_targets = y_shape[-1] if len(y_shape) == 2 else 1 X, y = _make_sparse_offset_regression(n_samples=11, n_features=n_features, n_targets=n_targets, random_state=0, shuffle=False, noise=noise) y = y.reshape(y_shape) sample_weight = 3 * rng.randn(len(X)) sample_weight = (sample_weight - sample_weight.min() + 1).astype(int) indices = np.repeat(np.arange(X.shape[0]), sample_weight) sample_weight = sample_weight.astype(float) X_tiled, y_tiled = X[indices], y[indices] cv = GroupKFold(n_splits=X.shape[0]) splits = cv.split(X_tiled, y_tiled, groups=indices) kfold = RidgeCV(alphas=alphas, cv=splits, scoring='neg_mean_squared_error', fit_intercept=fit_intercept) # ignore warning from GridSearchCV: DeprecationWarning: The default of the # `iid` parameter will change from True to False in version 0.22 and will # be removed in 0.24 with ignore_warnings(category=DeprecationWarning): kfold.fit(X_tiled, y_tiled) ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept) splits = cv.split(X_tiled, y_tiled, groups=indices) predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits) kfold_errors = (y_tiled - predictions)**2 kfold_errors = [ np.sum(kfold_errors[indices == i], axis=0) for i in np.arange(X.shape[0]) ] kfold_errors = np.asarray(kfold_errors) X_gcv = X_constructor(X) gcv_ridge = RidgeCV(alphas=alphas, store_cv_values=True, gcv_mode=gcv_mode, fit_intercept=fit_intercept) gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight) if len(y_shape) == 2: gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)] else: gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)] assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_) assert_allclose(gcv_errors, kfold_errors, rtol=1e-3) assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3) assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
def test_n_components(): # Test n_components returned by linkage, average and ward tree rng = np.random.RandomState(0) X = rng.rand(5, 5) # Connectivity matrix having five components. connectivity = np.eye(5) for linkage_func in _TREE_BUILDERS.values(): assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
def test_lars_precompute(classifier): # Check for different values of precompute G = np.dot(X.T, X) clf = classifier(precompute=G) output_1 = ignore_warnings(clf.fit)(X, y).coef_ for precompute in [True, False, 'auto', None]: clf = classifier(precompute=precompute) output_2 = clf.fit(X, y).coef_ assert_array_almost_equal(output_1, output_2, decimal=8)
def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable clf = QuadraticDiscriminantAnalysis() with ignore_warnings(): y_pred = clf.fit(X2, y6).predict(X2) assert np.any(y_pred != y6) # adding a little regularization fixes the problem clf = QuadraticDiscriminantAnalysis(reg_param=0.01) with ignore_warnings(): clf.fit(X2, y6) y_pred = clf.predict(X2) assert_array_equal(y_pred, y6) # Case n_samples_in_a_class < n_features clf = QuadraticDiscriminantAnalysis(reg_param=0.1) with ignore_warnings(): clf.fit(X5, y5) y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5)
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd', transform_alpha=0.001, random_state=0, n_jobs=4) with ignore_warnings(category=ConvergenceWarning): code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def test_get_deps_info(): with ignore_warnings(): deps_info = _get_deps_info() assert 'pip' in deps_info assert 'setuptools' in deps_info assert 'mrex' in deps_info assert 'numpy' in deps_info assert 'scipy' in deps_info assert 'Cython' in deps_info assert 'pandas' in deps_info assert 'matplotlib' in deps_info assert 'joblib' in deps_info
def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) X_test = np.array([[2, 1], [1, 1]]) grid = ParameterGrid({"n_estimators": [3], "max_samples": [0.5, 1.0, 3], "bootstrap": [True, False]}) with ignore_warnings(): for params in grid: IsolationForest(random_state=rng, **params).fit(X_train).predict(X_test)
def test_unstructured_linkage_tree(): # Check that we obtain the correct solution for unstructured linkage trees. rng = np.random.RandomState(0) X = rng.randn(50, 100) for this_X in (X, X[0]): # With specified a number of clusters just for the sake of # raising a warning and testing the warning code with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns(UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, tree_builder, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes
def test_warm_start(): X, y, _, _ = build_dataset() clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, y) ignore_warnings(clf.fit)(X, y) # do a second round with 5 iterations clf2 = ElasticNet(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, y) assert_array_almost_equal(clf2.coef_, clf.coef_)
def test_warm_start_multitask_lasso(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, Y) ignore_warnings(clf.fit)(X, Y) # do a second round with 5 iterations clf2 = MultiTaskLasso(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, Y) assert_array_almost_equal(clf2.coef_, clf.coef_)
def test_verbose_sgd(): # Test verbose. X = [[3, 2], [1, 6]] y = [1, 0] clf = MLPClassifier(solver='sgd', max_iter=2, verbose=10, hidden_layer_sizes=2) old_stdout = sys.stdout sys.stdout = output = StringIO() with ignore_warnings(category=ConvergenceWarning): clf.fit(X, y) clf.partial_fit(X, y) sys.stdout = old_stdout assert 'Iteration' in output.getvalue()
def test_same_output_sparse_dense_lasso_and_enet_cv(): X, y = make_sparse_data(n_samples=40, n_features=10) for normalize in [True, False]: clfs = ElasticNetCV(max_iter=100, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = ElasticNetCV(max_iter=100, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_) clfs = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_)