def test_ovr_always_present(): """Test that ovr works with classes that are always present or absent """ # Note: tests is the case where _ConstantPredictor is utilised X = np.ones((10, 2)) X[:5, :] = 0 y = np.zeros((10, 3)) y[5:, 0] = 1 y[:, 1] = 1 y[:, 2] = 1 [[int(i >= 5), 2, 3] for i in range(10)] ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict(X) assert_array_equal(np.array(y_pred), np.array(y)) y_pred = ovr.decision_function(X) assert_equal(np.unique(y_pred[:, -2:]), 1) y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.ones(X.shape[0])) # y has a constantly absent label y = np.zeros((10, 2)) y[5:, 0] = 1 # variable label ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert_equal(centers.shape, (n_clusters, n_features)) labels = labels assert_equal(np.unique(labels).shape[0], n_clusters) # check that the labels assignment are perfect (up to a permutation) assert_equal(v_measure_score(true_labels, labels), 1.0) assert_greater(inertia, 0.0) # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, init=centers) # to many clusters desired assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
def test_check_symmetric(): arr_sym = np.array([[0, 1], [1, 2]]) arr_bad = np.ones(2) arr_asym = np.array([[0, 2], [0, 2]]) test_arrays = {'dense': arr_asym, 'dok': sp.dok_matrix(arr_asym), 'csr': sp.csr_matrix(arr_asym), 'csc': sp.csc_matrix(arr_asym), 'coo': sp.coo_matrix(arr_asym), 'lil': sp.lil_matrix(arr_asym), 'bsr': sp.bsr_matrix(arr_asym)} # check error for bad inputs assert_raises(ValueError, check_symmetric, arr_bad) # check that asymmetric arrays are properly symmetrized for arr_format, arr in test_arrays.items(): # Check for warnings and errors assert_warns(UserWarning, check_symmetric, arr) assert_raises(ValueError, check_symmetric, arr, raise_exception=True) output = check_symmetric(arr, raise_warning=False) if sp.issparse(output): assert_equal(output.format, arr_format) assert_array_equal(output.toarray(), arr_sym) else: assert_array_equal(output, arr_sym)
def test_logistic_regression_path_convergence_fail(): rng = np.random.RandomState(0) X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2))) y = [1] * 100 + [-1] * 100 Cs = [1e3] assert_warns(ConvergenceWarning, logistic_regression_path, X, y, Cs=Cs, tol=0., max_iter=1, random_state=0, verbose=1)
def test_fastica_nowhiten(): m = [[0, 1], [1, 0]] # test for issue #697 ica = FastICA(n_components=1, whiten=False, random_state=0) assert_warns(UserWarning, ica.fit, m) assert hasattr(ica, 'mixing_')
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, sample_weight=None, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert_equal(centers.shape, (n_clusters, n_features)) labels = labels assert_equal(np.unique(labels).shape[0], n_clusters) # check that the labels assignment are perfect (up to a permutation) assert_equal(v_measure_score(true_labels, labels), 1.0) assert_greater(inertia, 0.0) # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, sample_weight=None, init=centers) # to many clusters desired assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1, sample_weight=None) # kmeans for algorithm='elkan' raises TypeError on sparse matrix assert_raise_message(TypeError, "algorithm='elkan' not supported for " "sparse input X", k_means, X=X_csr, n_clusters=2, sample_weight=None, algorithm="elkan")
def test_logistic_regression_convergence_warnings(): """Test that warnings are raised if model does not converge""" X, y = make_classification(n_samples=20, n_features=20) clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1) assert_warns(ConvergenceWarning, clf_lib.fit, X, y) assert_equal(clf_lib.n_iter_, 2)
def test_lasso_lars_vs_lasso_cd_ill_conditioned2(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate # descent give the same answers # Note it used to be the case that Lars had to use the drop for good # strategy for this but this is no longer the case with the # equality_tolerance checks X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]] y = [10, 10, 1] alpha = .0001 def objective_function(coef): return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2 + alpha * linalg.norm(coef, 1)) lars = linear_model.LassoLars(alpha=alpha, normalize=False) assert_warns(ConvergenceWarning, lars.fit, X, y) lars_coef_ = lars.coef_ lars_obj = objective_function(lars_coef_) coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False) cd_coef_ = coord_descent.fit(X, y).coef_ cd_obj = objective_function(cd_coef_) assert_less(lars_obj, cd_obj * (1. + 1e-8))
def test_grid_search_failing_classifier(): # GridSearchCV with on_error != 'raise' # Ensures that a warning is raised and score reset where appropriate. X, y = make_classification(n_samples=20, n_features=10, random_state=0) clf = FailingClassifier() # refit=False because we only want to check that errors caused by fits # to individual folds will be caught and warnings raised instead. If # refit was done, then an exception would be raised on refit and not # caught by grid_search (expected behavior), and this would cause an # error in this test. gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=0.0) assert_warns(FitFailedWarning, gs.fit, X, y) # Ensure that grid scores were set to zero as required for those fits # that are expected to fail. assert all(np.all(this_point.cv_validation_scores == 0.0) for this_point in gs.grid_scores_ if this_point.parameters['parameter'] == FailingClassifier.FAILING_PARAMETER) gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=float('nan')) assert_warns(FitFailedWarning, gs.fit, X, y) assert all(np.all(np.isnan(this_point.cv_validation_scores)) for this_point in gs.grid_scores_ if this_point.parameters['parameter'] == FailingClassifier.FAILING_PARAMETER)
def test_oob_score_regression(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
def test_grid_search_failing_classifier(): # GridSearchCV with on_error != 'raise' # Ensures that a warning is raised and score reset where appropriate. X, y = make_classification(n_samples=20, n_features=10, random_state=0) clf = FailingClassifier() # refit=False because we only want to check that errors caused by fits # to individual folds will be caught and warnings raised instead. If # refit was done, then an exception would be raised on refit and not # caught by grid_search (expected behavior), and this would cause an # error in this test. gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=0.0) assert_warns(FitFailedWarning, gs.fit, X, y) n_candidates = len(gs.cv_results_['params']) # Ensure that grid scores were set to zero as required for those fits # that are expected to fail. get_cand_scores = lambda i: np.array(list( gs.cv_results_['split%d_test_score' % s][i] for s in range(gs.n_splits_))) assert all((np.all(get_cand_scores(cand_i) == 0.0) for cand_i in range(n_candidates) if gs.cv_results_['param_parameter'][cand_i] == FailingClassifier.FAILING_PARAMETER)) gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=float('nan')) assert_warns(FitFailedWarning, gs.fit, X, y) n_candidates = len(gs.cv_results_['params']) assert all(np.all(np.isnan(get_cand_scores(cand_i))) for cand_i in range(n_candidates) if gs.cv_results_['param_parameter'][cand_i] == FailingClassifier.FAILING_PARAMETER)
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) if hasattr(np, 'VisibleDeprecationWarning'): # Let's not catch the numpy internal DeprecationWarnings warnings.simplefilter('ignore', np.VisibleDeprecationWarning) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_equal(len(warning_list), 1) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_oob_score_classification(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) for base_estimator in [DecisionTreeClassifier(), SVC()]: clf = BaggingClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingClassifier(base_estimator=base_estimator, n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) invalid_type = SVR() msg = ("A sparse matrix was passed, but dense data is required. " "Use X.toarray() to convert to a dense numpy array.") assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=False) with pytest.warns(DeprecationWarning): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) msg = ("Parameter 'accept_sparse' should be a string, " "boolean or list of strings. You provided 'accept_sparse={}'.") assert_raise_message(ValueError, msg.format(invalid_type), check_array, X_csr, accept_sparse=invalid_type) msg = ("When providing 'accept_sparse' as a tuple or list, " "it must contain at least one string value.") assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) assert_raise_message(TypeError, "SVR", check_array, X_csr, accept_sparse=[invalid_type]) # Test deprecation of 'None' assert_warns(DeprecationWarning, check_array, X, accept_sparse=None)
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True) # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True)
def test_grid_search_score_method(): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y) search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid, scoring='roc_auc').fit(X, y) search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y) # Check warning only occurs in situation where behavior changed: # estimator requires score method to compete with scoring parameter score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y) score_accuracy = assert_warns(ChangedBehaviorWarning, search_accuracy.score, X, y) score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score, X, y) score_auc = assert_warns(ChangedBehaviorWarning, search_auc.score, X, y) # ensure the test is sane assert_true(score_auc < 1.0) assert_true(score_accuracy < 1.0) assert_not_equal(score_auc, score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_convergence_fail(): d = load_linnerud() X = d.data Y = d.target pls_bynipals = pls_.PLSCanonical(n_components=X.shape[1], max_iter=2, tol=1e-10) assert_warns(ConvergenceWarning, pls_bynipals.fit, X, Y)
def test_identical_regressors(): newX = X.copy() newX[:, 1] = newX[:, 0] gamma = np.zeros(n_features) gamma[0] = gamma[1] = 1. newy = np.dot(newX, gamma) assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
def test_prf_average_compat(): # Ensure warning if f1_score et al.'s average is implicit for multiclass y_true = [1, 2, 3, 3] y_pred = [1, 2, 3, 1] y_true_bin = [0, 1, 1] y_pred_bin = [0, 1, 0] for metric in [precision_score, recall_score, f1_score, partial(fbeta_score, beta=2)]: score = assert_warns(DeprecationWarning, metric, y_true, y_pred) score_weighted = assert_no_warnings(metric, y_true, y_pred, average='weighted') assert_equal(score, score_weighted, 'average does not act like "weighted" by default') # check binary passes without warning assert_no_warnings(metric, y_true_bin, y_pred_bin) # but binary with pos_label=None should behave like multiclass score = assert_warns(DeprecationWarning, metric, y_true_bin, y_pred_bin, pos_label=None) score_weighted = assert_no_warnings(metric, y_true_bin, y_pred_bin, pos_label=None, average='weighted') assert_equal(score, score_weighted, 'average does not act like "weighted" by default with ' 'binary data and pos_label=None')
def test_sparse_randomized_pca_inverse(): """Test that RandomizedPCA is inversible on sparse data""" rng = np.random.RandomState(0) n, p = 50, 3 X = rng.randn(n, p) # spherical data X[:, 1] *= .00001 # make middle component relatively small # no large means because the sparse version of randomized pca does not do # centering to avoid breaking the sparsity X = csr_matrix(X) # same check that we can find the original data from the transformed signal # (since the data is almost of rank n_components) pca = RandomizedPCA(n_components=2, random_state=0) assert_warns(DeprecationWarning, pca.fit, X) Y = pca.transform(X) Y_inverse = pca.inverse_transform(Y) assert_almost_equal(X.todense(), Y_inverse, decimal=2) # same as above with whitening (approximate reconstruction) pca = assert_warns(DeprecationWarning, RandomizedPCA(n_components=2, whiten=True, random_state=0).fit, X) Y = pca.transform(X) Y_inverse = pca.inverse_transform(Y) relative_max_delta = (np.abs(X.todense() - Y_inverse) / np.abs(X).mean()).max() # XXX: this does not seam to work as expected: assert_almost_equal(relative_max_delta, 0.91, decimal=2)
def test_warning_n_components_greater_than_n_features(): n_features = 20 data, _ = make_sparse_random_data(5, n_features, int(n_features / 4)) for RandomProjection in all_RandomProjection: assert_warns(DataDimensionalityWarning, RandomProjection(n_components=n_features + 1).fit, data)
def test_label_binarizer_multilabel(): lb = LabelBinarizer() # test input as lists of tuples inp = [(2, 3), (1,), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = assert_warns(DeprecationWarning, lb.fit_transform, inp) assert_true(lb.multilabel_) assert_array_equal(indicator_mat, got) assert_equal(lb.inverse_transform(got), inp) # test input as label indicator matrix lb.fit(indicator_mat) assert_array_equal(indicator_mat, lb.inverse_transform(indicator_mat)) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = assert_warns(DeprecationWarning, lb.fit_transform, inp) assert_true(lb.multilabel_) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(xrange(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabels assert_array_equal( assert_warns(DeprecationWarning, unique_labels, [(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3) ) assert_array_equal(assert_warns(DeprecationWarning, unique_labels, [[0, 1, 2], [0], list(), [2, 1]]), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5)) # Some tests with strings input assert_array_equal(unique_labels(["a", "b", "c"], ["d"]), ["a", "b", "c", "d"]) assert_array_equal( assert_warns(DeprecationWarning, unique_labels, [["a", "b"], ["c"]], [["d"]]), ["a", "b", "c", "d"] )
def test_compute_class_weight_auto_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.])
def test_oob_score_classification(): # Check that oob prediction is a good estimation of the generalization # error. X, y = make_imbalance(iris.data, iris.target, ratio={0: 20, 1: 25, 2: 50}, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for base_estimator in [DecisionTreeClassifier(), SVC()]: clf = BalancedBaggingClassifier( base_estimator=base_estimator, n_estimators=100, bootstrap=True, oob_score=True, random_state=0).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert abs(test_score - clf.oob_score_) < 0.1 # Test with few estimators assert_warns(UserWarning, BalancedBaggingClassifier( base_estimator=base_estimator, n_estimators=1, bootstrap=True, oob_score=True, random_state=0).fit, X_train, y_train)
def test_timeout(): sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0, max_iter=1) with warnings.catch_warnings(record=True): warnings.simplefilter("always") assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
def test_label_binarizer(): lb = LabelBinarizer() # one-class case defaults to negative label inp = ["pos", "pos", "pos", "pos"] expected = np.array([[0, 0, 0, 0]]).T got = lb.fit_transform(inp) assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_")) assert_array_equal(lb.classes_, ["pos"]) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) # two-class case inp = ["neg", "pos", "pos", "neg"] expected = np.array([[0, 1, 1, 0]]).T got = lb.fit_transform(inp) assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_")) assert_array_equal(lb.classes_, ["neg", "pos"]) assert_array_equal(expected, got) to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) assert_array_equal(lb.inverse_transform(to_invert), inp) # multi-class case inp = ["spam", "ham", "eggs", "ham", "0"] expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]) got = lb.fit_transform(inp) assert_array_equal(lb.classes_, ["0", "eggs", "ham", "spam"]) assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_")) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def test_lars_drop_for_good(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate # descent give the same answers X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]] y = [10, 10, 1] alpha = .0001 def objective_function(coef): return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2 + alpha * linalg.norm(coef, 1)) lars = linear_model.LassoLars(alpha=alpha, normalize=False) assert_warns(ConvergenceWarning, lars.fit, X, y) lars_coef_ = lars.coef_ lars_obj = objective_function(lars_coef_) coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-10, normalize=False) with ignore_warnings(): cd_coef_ = coord_descent.fit(X, y).coef_ cd_obj = objective_function(cd_coef_) assert_less(lars_obj, cd_obj * (1. + 1e-8))
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with ignore_warnings(): ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_ridge_regression_convergence_fail(): rng = np.random.RandomState(0) y = rng.randn(5) X = rng.randn(5, 10) assert_warns(ConvergenceWarning, ridge_regression, X, y, alpha=1.0, solver="sparse_cg", tol=0., max_iter=None, verbose=1)
def test_roc_curve_toydata(): # Binary classification y_true = [0, 1] y_score = [0, 1] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 0, 1]) assert_array_almost_equal(fpr, [0, 1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [0, 1] y_score = [1, 0] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1, 1]) assert_array_almost_equal(fpr, [0, 0, 1]) assert_almost_equal(roc_auc, 0.) y_true = [1, 0] y_score = [1, 1] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [0, 1]) assert_almost_equal(roc_auc, 0.5) y_true = [1, 0] y_score = [1, 0] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 0, 1]) assert_array_almost_equal(fpr, [0, 1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [1, 0] y_score = [0.5, 0.5] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [0, 1]) assert_almost_equal(roc_auc, .5) y_true = [0, 0] y_score = [0.25, 0.75] # assert UndefinedMetricWarning because of no positive sample in y_true tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true, y_score) assert_raises(ValueError, roc_auc_score, y_true, y_score) assert_array_almost_equal(tpr, [0., 0.5, 1.]) assert_array_almost_equal(fpr, [np.nan, np.nan, np.nan]) y_true = [1, 1] y_score = [0.25, 0.75] # assert UndefinedMetricWarning because of no negative sample in y_true tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true, y_score) assert_raises(ValueError, roc_auc_score, y_true, y_score) assert_array_almost_equal(tpr, [np.nan, np.nan, np.nan]) assert_array_almost_equal(fpr, [0., 0.5, 1.]) # Multi-label classification task y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [0, 1]]) assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro") assert_raises(ValueError, roc_auc_score, y_true, y_score, average="weighted") assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 1.) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 1.) y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro") assert_raises(ValueError, roc_auc_score, y_true, y_score, average="weighted") assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0.5) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0.5) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0.5, 0.5], [0.5, 0.5]]) assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), .5)
def test_minibatch_k_means_init_multiple_runs_with_explicit_centers(): mb_k_means = MiniBatchKMeans(init=centers.copy(), n_clusters=n_clusters, random_state=42, n_init=10) assert_warns(RuntimeWarning, mb_k_means.fit, X)
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) assert_raises_regex(TypeError, msg, check_estimator, object()) # check that values returned by get_params match set_params msg = "get_params result does not match what was passed to set_params" assert_raises_regex(AssertionError, msg, check_estimator, ModifiesValueInsteadOfRaisingError()) assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams()) assert_raises_regex(AssertionError, msg, check_estimator, ModifiesAnotherValue()) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator()) # check that fit does input validation msg = "TypeError not raised" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier()) # check that sample_weights in fit accepts pandas.Series type try: from pandas import Series # noqa msg = ("Estimator NoSampleWeightPandasSeriesType raises error if " "'sample_weight' parameter is of type pandas.Series") assert_raises_regex(ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType) except ImportError: pass # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict()) # check that estimator state does not change # at transform/predict/predict_proba time msg = 'Estimator changes __dict__ during predict' assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict) # check that `fit` only changes attribures that # are private (start with an _ or end with a _). msg = ('Estimator ChangesWrongAttribute should not change or mutate ' 'the parameter wrong_attribute from 0 to 1 during fit.') assert_raises_regex(AssertionError, msg, check_estimator, ChangesWrongAttribute) check_estimator(ChangesUnderscoreAttribute) # check that `fit` doesn't add any public attribute msg = (r'Estimator adds public attribute\(s\) during the fit method.' ' Estimators are only allowed to add private attributes' ' either started with _ or ended' ' with _ but wrong_attribute added') assert_raises_regex(AssertionError, msg, check_estimator, SetsWrongAttribute) # check for invariant method name = NotInvariantPredict.__name__ method = 'predict' msg = ("{method} of {name} is not invariant when applied " "to a subset.").format(method=method, name=name) assert_raises_regex(AssertionError, msg, check_estimator, NotInvariantPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert_true(msg in string_buffer.getvalue()) # Large indices test on bad estimator msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to ' r'support \S{3}_64 matrix, and is not failing gracefully.*') # only supported by scipy version more than 0.14.0 if LARGE_SPARSE_SUPPORTED: assert_raises_regex(AssertionError, msg, check_estimator, LargeSparseNotSupportedClassifier) # non-regression test for estimators transforming to sparse data check_estimator(SparseTransformer()) # doesn't error on actual estimator check_estimator(AdaBoostClassifier) check_estimator(AdaBoostClassifier()) check_estimator(MultiTaskElasticNet) check_estimator(MultiTaskElasticNet())
def test_minibatch_init_with_large_k(): mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20) # Check that a warning is raised, as the number clusters is larger # than the init_size assert_warns(RuntimeWarning, mb_k_means.fit, X)
def test_data_format(): X = [1.3, 1.1, 0.9, 1.4, 1.5, 3.2] clf = loop.LocalOutlierProbability(X, n_neighbors=3) assert_warns(UserWarning, clf.fit)
def test_extent(): X = np.array([[1, 1], [1, 0]]) clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4) assert_warns(UserWarning, clf.fit)
def test_lasso_alpha_warning(): X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line clf = Lasso(alpha=0) assert_warns(UserWarning, clf.fit, X, Y)
def test_f_classif_constant_feature(): # Test that f_classif warns if a feature is constant throughout. X, y = make_classification(n_samples=10, n_features=5) X[:, 0] = 2.0 assert_warns(UserWarning, f_classif, X, y)
def test_check_array_dtype_warning(): X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] X_float64 = np.asarray(X_int_list, dtype=np.float64) X_float32 = np.asarray(X_int_list, dtype=np.float32) X_int64 = np.asarray(X_int_list, dtype=np.int64) X_csr_float64 = sp.csr_matrix(X_float64) X_csr_float32 = sp.csr_matrix(X_float32) X_csc_float32 = sp.csc_matrix(X_float32) X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32) y = [0, 0, 1] integer_data = [X_int64, X_csc_int32] float64_data = [X_float64, X_csr_float64] float32_data = [X_float32, X_csr_float32, X_csc_float32] for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) assert_equal(X_checked.dtype, np.float64) X_checked = assert_warns(DataConversionWarning, check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert_equal(X_checked.dtype, np.float64) # Check that the warning message includes the name of the Estimator X_checked = assert_warns_message(DataConversionWarning, 'SomeEstimator', check_array, X, dtype=[np.float64, np.float32], accept_sparse=True, warn_on_dtype=True, estimator='SomeEstimator') assert_equal(X_checked.dtype, np.float64) X_checked, y_checked = assert_warns_message( DataConversionWarning, 'KNeighborsClassifier', check_X_y, X, y, dtype=np.float64, accept_sparse=True, warn_on_dtype=True, estimator=KNeighborsClassifier()) assert_equal(X_checked.dtype, np.float64) for X in float64_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert_equal(X_checked.dtype, np.float64) X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=False) assert_equal(X_checked.dtype, np.float64) for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=True) assert_equal(X_checked.dtype, np.float32) assert X_checked is X X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=True) assert_equal(X_checked.dtype, np.float32) assert X_checked is not X X_checked = assert_no_warnings(check_array, X_csc_float32, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=False) assert_equal(X_checked.dtype, np.float32) assert X_checked is not X_csc_float32 assert_equal(X_checked.format, 'csr')
def test_alpha(): # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1]) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2. / 3, 1. / 3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test sparse X X = scipy.sparse.csr_matrix(X) nb = BernoulliNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[1, 0], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) nb = MultinomialNB(alpha=0.) assert_warns(UserWarning, nb.fit, X, y) prob = np.array([[2. / 3, 1. / 3], [0, 1]]) assert_array_almost_equal(nb.predict_proba(X), prob) # Test for alpha < 0 X = np.array([[1, 0], [1, 1]]) y = np.array([0, 1]) expected_msg = ('Smoothing parameter alpha = -1.0e-01. ' 'alpha should be > 0.') b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y) assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y) b_nb = BernoulliNB(alpha=-0.1) m_nb = MultinomialNB(alpha=-0.1) assert_raise_message(ValueError, expected_msg, b_nb.partial_fit, X, y, classes=[0, 1]) assert_raise_message(ValueError, expected_msg, m_nb.partial_fit, X, y, classes=[0, 1])
def test_ledoit_wolf(): # Tests LedoitWolf module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) lw = LedoitWolf(assume_centered=True) lw.fit(X_centered) shrinkage_ = lw.shrinkage_ score_ = lw.score(X_centered) assert_almost_equal( ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_) assert_almost_equal( ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6), shrinkage_) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf(assume_centered=True) lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X_centered) assert_almost_equal(lw.score(X_centered), score_, 4) assert (lw.precision_ is None) # Same tests without assuming centered data # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X) assert_almost_equal(lw.shrinkage_, shrinkage_, 4) assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X)) assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1]) assert_almost_equal(lw.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) # test with one sample # FIXME I don't know what this test does X_1sample = np.arange(5) lw = LedoitWolf() assert_warns(UserWarning, lw.fit, X_1sample) assert_array_almost_equal(lw.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) assert_almost_equal(lw.score(X), score_, 4) assert (lw.precision_ is None)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # FIXME I don't know what this test does X_1sample = np.arange(5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_ignore_warning(): # This check that ignore_warning decorateur and context manager are working # as expected def _warning_function(): warnings.warn("deprecation warning", DeprecationWarning) def _multiple_warning_function(): warnings.warn("deprecation warning", DeprecationWarning) warnings.warn("deprecation warning") # Check the function directly assert_no_warnings(ignore_warnings(_warning_function)) assert_no_warnings( ignore_warnings(_warning_function, category=DeprecationWarning)) assert_warns(DeprecationWarning, ignore_warnings(_warning_function, category=UserWarning)) assert_warns( UserWarning, ignore_warnings(_multiple_warning_function, category=DeprecationWarning)) assert_warns( DeprecationWarning, ignore_warnings(_multiple_warning_function, category=UserWarning)) assert_no_warnings( ignore_warnings(_warning_function, category=(DeprecationWarning, UserWarning))) # Check the decorator @ignore_warnings def decorator_no_warning(): _warning_function() _multiple_warning_function() @ignore_warnings(category=(DeprecationWarning, UserWarning)) def decorator_no_warning_multiple(): _multiple_warning_function() @ignore_warnings(category=DeprecationWarning) def decorator_no_deprecation_warning(): _warning_function() @ignore_warnings(category=UserWarning) def decorator_no_user_warning(): _warning_function() @ignore_warnings(category=DeprecationWarning) def decorator_no_deprecation_multiple_warning(): _multiple_warning_function() @ignore_warnings(category=UserWarning) def decorator_no_user_multiple_warning(): _multiple_warning_function() assert_no_warnings(decorator_no_warning) assert_no_warnings(decorator_no_warning_multiple) assert_no_warnings(decorator_no_deprecation_warning) assert_warns(DeprecationWarning, decorator_no_user_warning) assert_warns(UserWarning, decorator_no_deprecation_multiple_warning) assert_warns(DeprecationWarning, decorator_no_user_multiple_warning) # Check the context manager def context_manager_no_warning(): with ignore_warnings(): _warning_function() def context_manager_no_warning_multiple(): with ignore_warnings(category=(DeprecationWarning, UserWarning)): _multiple_warning_function() def context_manager_no_deprecation_warning(): with ignore_warnings(category=DeprecationWarning): _warning_function() def context_manager_no_user_warning(): with ignore_warnings(category=UserWarning): _warning_function() def context_manager_no_deprecation_multiple_warning(): with ignore_warnings(category=DeprecationWarning): _multiple_warning_function() def context_manager_no_user_multiple_warning(): with ignore_warnings(category=UserWarning): _multiple_warning_function() assert_no_warnings(context_manager_no_warning) assert_no_warnings(context_manager_no_warning_multiple) assert_no_warnings(context_manager_no_deprecation_warning) assert_warns(DeprecationWarning, context_manager_no_user_warning) assert_warns(UserWarning, context_manager_no_deprecation_multiple_warning) assert_warns(DeprecationWarning, context_manager_no_user_multiple_warning)
def test_acmes_clip_samples(): opt = ACMESOptimizer(n_train_max=20001) assert_warns(UserWarning, opt.init, 1) assert_equal(opt.n_train_max, 20000)
tree_builder, X.T, np.ones((4, 4))) def test_unstructured_linkage_tree(): """ Check that we obtain the correct solution for unstructured linkage trees. """ rnd = np.random.RandomState(0) X = rnd.randn(50, 100) <<<<<<< HEAD ======= for this_X in (X, X[0]): # With specified a number of clusters just for the sake of # raising a warning and testing the warning code children, n_nodes, n_leaves, parent = assert_warns(UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert_equal(len(children) + n_leaves, n_nodes) >>>>>>> remote for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) warnings.simplefilter("ignore", DeprecationWarning) # With specified a number of clusters just for the sake of # raising a warning and testing the warning code children, n_nodes, n_leaves, parent = tree_builder( this_X.T, n_clusters=10)
def test_label_binarizer_errors(): """Check that invalid arguments yield ValueError""" one_class = np.array([0, 0, 0, 0]) lb = LabelBinarizer().fit(one_class) assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_")) multi_label = [(2, 3), (0, ), (0, 2)] assert_raises(ValueError, lb.transform, multi_label) lb = LabelBinarizer() assert_raises(ValueError, lb.transform, []) assert_raises(ValueError, lb.inverse_transform, []) y = np.array([[0, 1, 0], [1, 1, 1]]) classes = np.arange(3) assert_raises(ValueError, label_binarize, y, classes, multilabel=True, neg_label=2, pos_label=1) assert_raises(ValueError, label_binarize, y, classes, multilabel=True, neg_label=2, pos_label=2) assert_raises(ValueError, LabelBinarizer, neg_label=2, pos_label=1) assert_raises(ValueError, LabelBinarizer, neg_label=2, pos_label=2) assert_raises(ValueError, LabelBinarizer, neg_label=1, pos_label=2, sparse_output=True) # Fail on y_type assert_raises(ValueError, _inverse_binarize_thresholding, y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2], threshold=0) # Fail on the number of classes assert_raises(ValueError, _inverse_binarize_thresholding, y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2, 3], threshold=0) # Fail on the dimension of 'binary' assert_raises(ValueError, _inverse_binarize_thresholding, y=np.array([[1, 2, 3], [2, 1, 3]]), output_type="binary", classes=[1, 2, 3], threshold=0) # Fail on multioutput data assert_raises(ValueError, LabelBinarizer().fit, np.array([[1, 3], [2, 1]])) assert_raises(ValueError, label_binarize, np.array([[1, 3], [2, 1]]), [1, 2, 3])
def test_extent() -> None: X = np.array([[1, 1], [1, 0]]) clf = loop.LocalOutlierProbability(X, n_neighbors=2, extent=4, use_numba=NUMBA) assert_warns(UserWarning, clf.fit)
def test_metric_params_interface(): assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier, metric_params={'p': 3})
def test_check_array_dtype_warning(): X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] X_float64 = np.asarray(X_int_list, dtype=np.float64) X_float32 = np.asarray(X_int_list, dtype=np.float32) X_int64 = np.asarray(X_int_list, dtype=np.int64) X_csr_float64 = sp.csr_matrix(X_float64) X_csr_float32 = sp.csr_matrix(X_float32) X_csc_float32 = sp.csc_matrix(X_float32) X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32) y = [0, 0, 1] integer_data = [X_int64, X_csc_int32] float64_data = [X_float64, X_csr_float64] float32_data = [X_float32, X_csr_float32, X_csc_float32] for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) assert X_checked.dtype == np.float64 X_checked = assert_warns(DataConversionWarning, check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert X_checked.dtype == np.float64 # Check that the warning message includes the name of the Estimator X_checked = assert_warns_message(DataConversionWarning, 'SomeEstimator', check_array, X, dtype=[np.float64, np.float32], accept_sparse=True, warn_on_dtype=True, estimator='SomeEstimator') assert X_checked.dtype == np.float64 X_checked, y_checked = assert_warns_message( DataConversionWarning, 'KNeighborsClassifier', check_X_y, X, y, dtype=np.float64, accept_sparse=True, warn_on_dtype=True, estimator=KNeighborsClassifier()) assert X_checked.dtype == np.float64 for X in float64_data: with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert X_checked.dtype == np.float64 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=False) assert X_checked.dtype == np.float64 assert len(record) == 0 for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=True) assert X_checked.dtype == np.float32 assert X_checked is X X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=True) assert X_checked.dtype == np.float32 assert X_checked is not X X_checked = assert_no_warnings(check_array, X_csc_float32, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=False) assert X_checked.dtype == np.float32 assert X_checked is not X_csc_float32 assert X_checked.format == 'csr'
def test_timeout(): sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0, max_iter=1) assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
def test_metric_params_interface(): assert_warns(DeprecationWarning, neighbors.KNeighborsClassifier, metric='wminkowski', w=np.ones(10)) assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier, metric_params={'p': 3})
def test_nearmiss_deprecation(): nm = NearMiss(ver3_samp_ngh=3, version=3) assert_warns(DeprecationWarning, nm.fit_sample, X, Y)
def check_target_type(name, Estimator): X = np.random.random((20, 2)) y = np.linspace(0, 1, 20) estimator = Estimator() set_random_state(estimator) assert_warns(UserWarning, estimator.fit, X, y)
def check_multiclass_warning(name, Estimator): X = np.random.random((20, 2)) y = np.array([0] * 3 + [1] * 2 + [2] * 15) estimator = Estimator() set_random_state(estimator) assert_warns(UserWarning, estimator.fit, X, y)
def test_oob_score(): """Test if oob_score is deprecated. """ clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5) clf.fit(X, y) assert_warns(DeprecationWarning, hasattr, clf, 'oob_score_')
def test_multilabel_representation_invariance(): # Generate some data n_classes = 4 n_samples = 50 # using sequence of sequences is deprecated, but still tested make_ml = ignore_warnings(make_multilabel_classification) _, y1 = make_ml(n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples) _, y2 = make_ml(n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples) # Be sure to have at least one empty label y1 += ([], ) y2 += ([], ) # NOTE: The "sorted" trick is necessary to shuffle labels, because it # allows to return the shuffled tuple. rng = check_random_state(42) shuffled = lambda x: sorted(x, key=lambda *args: rng.rand()) y1_shuffle = [shuffled(x) for x in y1] y2_shuffle = [shuffled(x) for x in y2] # Let's have redundant labels y1_redundant = [x * rng.randint(1, 4) for x in y1] y2_redundant = [x * rng.randint(1, 4) for x in y2] # Binary indicator matrix format lb = MultiLabelBinarizer().fit([range(n_classes)]) y1_binary_indicator = lb.transform(y1) y2_binary_indicator = lb.transform(y2) y1_shuffle_binary_indicator = lb.transform(y1_shuffle) y2_shuffle_binary_indicator = lb.transform(y2_shuffle) for name in MULTILABELS_METRICS: metric = ALL_METRICS[name] # XXX cruel hack to work with partial functions if isinstance(metric, partial): metric.__module__ = 'tmp' metric.__name__ = name # Check warning for sequence of sequences measure = assert_warns(DeprecationWarning, metric, y1, y2) metric = ignore_warnings(metric) # Check representation invariance assert_almost_equal(metric(y1_binary_indicator, y2_binary_indicator), measure, err_msg="%s failed representation invariance " "between list of list of labels " "format and dense binary indicator " "format." % name) with ignore_warnings(): # sequence of sequences is deprecated # Check invariance with redundant labels with list of labels assert_almost_equal( metric(y1, y2_redundant), measure, err_msg="%s failed rendundant label invariance" % name) assert_almost_equal( metric(y1_redundant, y2_redundant), measure, err_msg="%s failed rendundant label invariance" % name) assert_almost_equal( metric(y1_redundant, y2), measure, err_msg="%s failed rendundant label invariance" % name) # Check shuffling invariance with list of labels assert_almost_equal(metric(y1_shuffle, y2_shuffle), measure, err_msg="%s failed shuffling invariance " "with list of list of labels format." % name) # Check shuffling invariance with dense binary indicator matrix assert_almost_equal(metric(y1_shuffle_binary_indicator, y2_shuffle_binary_indicator), measure, err_msg="%s failed shuffling invariance " " with dense binary indicator format." % name) with ignore_warnings(): # sequence of sequences is deprecated # Check raises error with mix input representation assert_raises(ValueError, metric, y1, y2_binary_indicator) assert_raises(ValueError, metric, y1_binary_indicator, y2)
def test_timeout(): a = svm.SVC(kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0, max_iter=1) assert_warns(ConvergenceWarning, a.fit, X, Y)
def test_factor_analysis(): """Test FactorAnalysis ability to recover the data covariance structure """ rng = np.random.RandomState(0) n_samples, n_features, n_components = 20, 5, 3 # Some random settings for the generative model W = rng.randn(n_components, n_features) # latent variable of dim 3, 20 of it h = rng.randn(n_samples, n_components) # using gamma to model different noise variance # per component noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features) # generate observations # wlog, mean is 0 X = np.dot(h, W) + noise assert_raises(ValueError, FactorAnalysis, svd_method='foo') fa_fail = FactorAnalysis() fa_fail.svd_method = 'foo' assert_raises(ValueError, fa_fail.fit, X) fas = [] for method in ['randomized', 'lapack']: fa = FactorAnalysis(n_components=n_components, svd_method=method) fa.fit(X) fas.append(fa) X_t = fa.transform(X) assert_equal(X_t.shape, (n_samples, n_components)) assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum()) assert_almost_equal(fa.score_samples(X).mean(), fa.score(X)) diff = np.all(np.diff(fa.loglike_)) assert_greater(diff, 0., 'Log likelihood dif not increase') # Sample Covariance scov = np.cov(X, rowvar=0., bias=1.) # Model Covariance mcov = fa.get_covariance() diff = np.sum(np.abs(scov - mcov)) / W.size assert_less(diff, 0.1, "Mean absolute difference is %f" % diff) fa = FactorAnalysis(n_components=n_components, noise_variance_init=np.ones(n_features)) assert_raises(ValueError, fa.fit, X[:, :2]) f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal fa1, fa2 = fas for attr in ['loglike_', 'components_', 'noise_variance_']: assert_almost_equal(f(fa1, attr), f(fa2, attr)) fa1.max_iter = 1 fa1.verbose = True assert_warns(ConvergenceWarning, fa1.fit, X) # Test get_covariance and get_precision with n_components == n_features # with n_components < n_features and with n_components == 0 for n_components in [0, 2, X.shape[1]]: fa.n_components = n_components fa.fit(X) cov = fa.get_covariance() precision = fa.get_precision() assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
def test_linear_svc_convergence_warnings(): # Test that warnings are raised if model does not converge lsvc = svm.LinearSVC(max_iter=2, verbose=1) assert_warns(ConvergenceWarning, lsvc.fit, X, Y) assert_equal(lsvc.n_iter_, 2)
def test_lda_n_topics_deprecation(): n_components, X = _build_sparse_mtx() lda = LatentDirichletAllocation(n_topics=10, learning_method='batch') assert_warns(DeprecationWarning, lda.fit, X)