def test_deprecated_grid_search_iid(self): depr_message = ( "The default of the `iid` parameter will change from True " "to False in version 0.22") X, y = make_blobs(n_samples=54, random_state=0, centers=2) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=3) # no warning with equally sized test sets assert_no_warnings(grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=5) # warning because 54 % 5 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=2) # warning because stratification into two classes and 27 % 2 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=KFold(2)) # no warning because no stratification and 54 % 2 == 0 assert_no_warnings(grid.fit, X, y)
def test_grid_search_score_method(): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y) search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid, scoring='roc_auc').fit(X, y) search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y) # ChangedBehaviourWarning occurred previously (prior to #9005) score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y) score_accuracy = assert_no_warnings(search_accuracy.score, X, y) score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score, X, y) score_auc = assert_no_warnings(search_auc.score, X, y) # ensure the test is sane assert_true(score_auc < 1.0) assert_true(score_accuracy < 1.0) assert_not_equal(score_auc, score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_one_hot_encoder_deprecationwarnings(): for X in [[[3, 2, 1], [0, 1, 1]], [[3., 2., 1.], [0., 1., 1.]]]: enc = OneHotEncoder() assert_warns_message(FutureWarning, "handling of integer", enc.fit, X) enc = OneHotEncoder() assert_warns_message(FutureWarning, "handling of integer", enc.fit_transform, X) # check it still works correctly as well with ignore_warnings(category=FutureWarning): X_trans = enc.fit_transform(X).toarray() res = [[0., 1., 0., 1., 1.], [1., 0., 1., 0., 1.]] assert_array_equal(X_trans, res) # check deprecated attributes assert_warns(DeprecationWarning, lambda: enc.active_features_) assert_warns(DeprecationWarning, lambda: enc.feature_indices_) assert_warns(DeprecationWarning, lambda: enc.n_values_) # check no warning is raised if keyword is specified enc = OneHotEncoder(categories='auto') assert_no_warnings(enc.fit, X) enc = OneHotEncoder(categories='auto') assert_no_warnings(enc.fit_transform, X) X_trans = enc.fit_transform(X).toarray() assert_array_equal(X_trans, res) # check there is also a warning if the default is passed enc = OneHotEncoder(n_values='auto', handle_unknown='ignore') assert_warns(DeprecationWarning, enc.fit, X) X = np.array([['cat1', 'cat2']], dtype=object).T enc = OneHotEncoder(categorical_features='all') assert_warns(DeprecationWarning, enc.fit, X)
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True) # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True)
def test_feature_agglomeration(): n_clusters = 1 X = np.array([0, 0, 1]).reshape(1, 3) # (n_samples, n_features) agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) agglo_median = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.median) assert_no_warnings(agglo_mean.fit, X) assert_no_warnings(agglo_median.fit, X) assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters) assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters) assert_true(np.size(agglo_mean.labels_) == X.shape[1]) assert_true(np.size(agglo_median.labels_) == X.shape[1]) # Test transform Xt_mean = agglo_mean.transform(X) Xt_median = agglo_median.transform(X) assert_true(Xt_mean.shape[1] == n_clusters) assert_true(Xt_median.shape[1] == n_clusters) assert_true(Xt_mean == np.array([1 / 3.])) assert_true(Xt_median == np.array([0.])) # Test inverse transform X_full_mean = agglo_mean.inverse_transform(Xt_mean) X_full_median = agglo_median.inverse_transform(Xt_median) assert_true(np.unique(X_full_mean[0]).size == n_clusters) assert_true(np.unique(X_full_median[0]).size == n_clusters) assert_array_almost_equal(agglo_mean.transform(X_full_mean), Xt_mean) assert_array_almost_equal(agglo_median.transform(X_full_median), Xt_median)
def test_grid_search_score_method(): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y) search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid, scoring='roc_auc').fit(X, y) search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y) # Check warning only occurs in situation where behavior changed: # estimator requires score method to compete with scoring parameter score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y) score_accuracy = assert_warns(ChangedBehaviorWarning, search_accuracy.score, X, y) score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score, X, y) score_auc = assert_warns(ChangedBehaviorWarning, search_auc.score, X, y) # ensure the test is sane assert_true(score_auc < 1.0) assert_true(score_accuracy < 1.0) assert_not_equal(score_auc, score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_skope_rules_error(): """Test that it gives proper exception on deficient input.""" X = iris.data y = iris.target y = (y != 0) # Test similarity_thres: assert_raises(ValueError, SkopeRules(similarity_thres=2).fit, X, y) assert_raises(ValueError, SkopeRules(similarity_thres=0).fit, X, y) # Test max_samples assert_raises(ValueError, SkopeRules(max_samples=-1).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=0.0).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=2.0).fit, X, y) # explicitly setting max_samples > n_samples should result in a warning. assert_warns_message( UserWarning, "max_samples will be set to n_samples for estimation", SkopeRules(max_samples=1000).fit, X, y) assert_no_warnings(SkopeRules(max_samples=np.int64(2)).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples='foobar').fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=1.5).fit, X, y) assert_raises(ValueError, SkopeRules().fit(X, y).predict, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).decision_function, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).rules_vote, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).separate_rules_score, X[:, 1:])
def test_one_hot_encoder_invalid_params(): enc = OneHotEncoder(drop='second') assert_raises_regex( ValueError, "Wrong input for parameter `drop`.", enc.fit, [["Male"], ["Female"]]) enc = OneHotEncoder(handle_unknown='ignore', drop='first') assert_raises_regex( ValueError, "`handle_unknown` must be 'error'", enc.fit, [["Male"], ["Female"]]) enc = OneHotEncoder(drop='first') assert_raises_regex( ValueError, "The handling of integer data will change in version", enc.fit, [[1], [2]]) enc = OneHotEncoder(drop='first', categories='auto') assert_no_warnings(enc.fit_transform, [[1], [2]]) enc = OneHotEncoder(drop=np.asarray('b', dtype=object)) assert_raises_regex( ValueError, "Wrong input for parameter `drop`.", enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]]) enc = OneHotEncoder(drop=['ghi', 3, 59]) assert_raises_regex( ValueError, "The following categories were supposed", enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]])
def test_no_empty_slice_warning(): # test if we avoid numpy warnings for computing over empty arrays n_components = 10 n_features = n_components + 2 # anything > n_comps triggered it in 0.16 X = np.random.uniform(-1, 1, size=(n_components, n_features)) pca = PCA(n_components=n_components) assert_no_warnings(pca.fit, X)
def test_mnb_prior_unobserved_targets(): # test smoothing of prior for yet unobserved targets # Create toy training data X = np.array([[0, 1], [1, 0]]) y = np.array([0, 1]) clf = MultinomialNB() assert_no_warnings( clf.partial_fit, X, y, classes=[0, 1, 2] ) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 0 # add a training example with previously unobserved class assert_no_warnings( clf.partial_fit, [[1, 1]], [2] ) assert clf.predict([[0, 1]]) == 0 assert clf.predict([[1, 0]]) == 1 assert clf.predict([[1, 1]]) == 2
def test_pickle_version_warning(): # check that warnings are raised when unpickling in a different version # first, check no warning when in the same version: iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert_true(b"version" in tree_pickle) assert_no_warnings(pickle.loads, tree_pickle) # check that warning is raised on different version tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(), b"something") message = ("Trying to unpickle estimator DecisionTreeClassifier from " "version {0} when using version {1}. This might lead to " "breaking code or invalid results. " "Use at your own risk.".format("something", sklearn.__version__)) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other) # check that not including any version also works: # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert_false(b"version" in tree_pickle_noversion) message = message.replace("something", "pre-0.18") message = message.replace("DecisionTreeClassifier", "TreeNoVersion") # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion) # check that no warning is raised for external estimators TreeNoVersion.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion)
def test_lda_dimension_warning(n_classes, n_features): # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: # if n_components > min(n_classes - 1, n_features), raise warning # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be larger than min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) future_msg = ("In version 0.23, setting n_components > min(" "n_features, n_classes - 1) will raise a " "ValueError. You should set n_components to None" " (default), or a value smaller or equal to " "min(n_features, n_classes - 1).") assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
def test_return_train_score_warn_0_19(self): # Test that warnings are raised. Will be removed in 0.21 X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) grid = {'C': [1, 2]} estimators = [GridSearchCV(LinearSVC(random_state=0), grid), RandomizedSearchCV(LinearSVC(random_state=0), grid, n_iter=2)] result = {} for estimator in estimators: for val in [True, False, 'warn']: estimator.set_params(return_train_score=val) result[val] = assert_no_warnings(estimator.fit, X, y).cv_results_ train_keys = ['split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score'] for key in train_keys: msg = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) train_score = assert_warns_message(FutureWarning, msg, result['warn'].get, key) assert np.allclose(train_score, result[True][key]) assert key not in result[False] for key in result['warn']: if key not in train_keys: assert_no_warnings(result['warn'].get, key)
def test_no_warning_for_zero_mse(): # LassoLarsIC should not warn for log of zero MSE. y = np.arange(10, dtype=float) X = y.reshape(-1, 1) lars = linear_model.LassoLarsIC(normalize=False) assert_no_warnings(lars.fit, X, y) assert_true(np.any(np.isinf(lars.criterion_)))
def test_vectorizer_stop_words_inconsistent(): if PY2: lstr = "[u'and', u'll', u've']" else: lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) for vec in [CountVectorizer(), TfidfVectorizer(), HashingVectorizer()]: vec.set_params(stop_words=["you've", "you", "you'll", 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world']) # reset stop word validation del vec._stop_words_id assert _check_stop_words_consistency(vec) is False # Only one warning per stop list assert_no_warnings(vec.fit_transform, ['hello world']) assert _check_stop_words_consistency(vec) is None # Test caching of inconsistency assessment vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world'])
def test_pickle_version_warning(): # check that warnings are raised when unpickling in a different version # first, check no warning when in the same version: iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert_no_warnings(pickle.loads, tree_pickle) # check that warning is raised on different version tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(), b"something") message = ("Trying to unpickle estimator DecisionTreeClassifier from version " "{0} when using version {1}. This might lead to breaking " "code or invalid results. Use at your own risk.".format( "something", sklearn.__version__)) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other) # check that not including any version also works: # don't do this at home, kids from sklearn.base import BaseEstimator old_getstate = BaseEstimator.__getstate__ del BaseEstimator.__getstate__ # tree lost its getstate, like pre-0.18 assert_false(hasattr(tree, "__getstate__")) tree_pickle_noversion = pickle.dumps(tree) message = message.replace("something", "pre-0.18") # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion) BaseEstimator.__getstate__ = old_getstate # check that no warning is raised for external estimators DecisionTreeClassifier.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion)
def test_prf_average_compat(): # Ensure warning if f1_score et al.'s average is implicit for multiclass y_true = [1, 2, 3, 3] y_pred = [1, 2, 3, 1] y_true_bin = [0, 1, 1] y_pred_bin = [0, 1, 0] for metric in [precision_score, recall_score, f1_score, partial(fbeta_score, beta=2)]: score = assert_warns(DeprecationWarning, metric, y_true, y_pred) score_weighted = assert_no_warnings(metric, y_true, y_pred, average='weighted') assert_equal(score, score_weighted, 'average does not act like "weighted" by default') # check binary passes without warning assert_no_warnings(metric, y_true_bin, y_pred_bin) # but binary with pos_label=None should behave like multiclass score = assert_warns(DeprecationWarning, metric, y_true_bin, y_pred_bin, pos_label=None) score_weighted = assert_no_warnings(metric, y_true_bin, y_pred_bin, pos_label=None, average='weighted') assert_equal(score, score_weighted, 'average does not act like "weighted" by default with ' 'binary data and pos_label=None')
def test_gamma_auto(): X, y = [[0.0, 1.2], [1.0, 1.3]], [0, 1] msg = ("The default value of gamma will change from 'auto' to 'scale' in " "version 0.22 to account better for unscaled features. Set gamma " "explicitly to 'auto' or 'scale' to avoid this warning.") assert_warns_message(FutureWarning, msg, svm.SVC().fit, X, y) assert_no_warnings(svm.SVC(kernel='linear').fit, X, y) assert_no_warnings(svm.SVC(kernel='precomputed').fit, X, y)
def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator(): iris = datasets.load_iris() tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) try: module_backup = TreeNoVersion.__module__ TreeNoVersion.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion) finally: TreeNoVersion.__module__ = module_backup
def test_gamma_scale(): X, y = [[0.], [1.]], [0, 1] clf = svm.SVC(gamma='scale') assert_no_warnings(clf.fit, X, y) assert_equal(clf._gamma, 2.) # X_std ~= 1 shouldn't raise warning, for when # gamma is not explicitly set. X, y = [[1, 2], [3, 2 * np.sqrt(6) / 3 + 2]], [0, 1] assert_no_warnings(clf.fit, X, y)
def test_transform_target_regressor_invertible(): X, y = friedman regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log, check_inverse=True) assert_warns_message(UserWarning, "The provided functions or transformer" " are not strictly inverse of each other.", regr.fit, X, y) regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log) regr.set_params(check_inverse=False) assert_no_warnings(regr.fit, X, y)
def test_recall_warnings(): assert_no_warnings(recall_score, np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') clean_warning_registry() with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') recall_score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') assert_equal(str(record.pop().message), 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.')
def test_recall_warnings(): assert_no_warnings(recall_score, np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') recall_score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') assert_equal( str(record.pop().message), 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.')
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting max_samples > n_samples # should result in a warning. If not set explicitly there should be no warning assert_warns_message( UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X) assert_no_warnings(IsolationForest(max_samples='auto').fit, X) assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
def test_precision_warnings(): with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') precision_score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') assert_equal(str(record.pop().message), 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.') assert_no_warnings(precision_score, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro')
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization assert_no_warnings(nnmf, A, A, A, np.int64(1)) msg = "Number of components must be a positive integer; " "got (n_components=1.5)" assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5) msg = "Number of components must be a positive integer; " "got (n_components='2')" assert_raise_message(ValueError, msg, nnmf, A, A, A, "2") msg = "Negative values in data passed to NMF (input H)" assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, "custom") msg = "Negative values in data passed to NMF (input W)" assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, "custom") msg = "Array passed to NMF (input H) is full of zeros" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, "custom")
def test_precision_warnings(): with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') precision_score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') assert_equal( str(record.pop().message), 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.') assert_no_warnings(precision_score, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro')
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization assert_no_warnings(nnmf, A, A, A, np.int64(1)) msg = "Number of components must be a positive integer; got (n_components=1.5)" assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5) msg = "Number of components must be a positive integer; got (n_components='2')" assert_raise_message(ValueError, msg, nnmf, A, A, A, '2') msg = "Negative values in data passed to NMF (input H)" assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom') msg = "Negative values in data passed to NMF (input W)" assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom') msg = "Array passed to NMF (input H) is full of zeros" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom')
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting max_samples > n_samples # should result in a warning. If not set explicitly there should be no warning assert_warns_message( UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X ) assert_no_warnings(IsolationForest(max_samples="auto").fit, X) assert_raises(ValueError, IsolationForest(max_samples="foobar").fit, X)
def test_check_increasing_up_extreme(): x = [0, 1, 2, 3, 4, 5] y = [0, 1, 2, 3, 4, 5] # Check that we got increasing=True and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert is_increasing
def test_check_increasing_up(): x = [0, 1, 2, 3, 4, 5] y = [0, 1.5, 2.77, 8.99, 8.99, 50] # Check that we got increasing=True and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert is_increasing
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message(UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message(UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings(affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_check_increasing_down_extreme(): x = [0, 1, 2, 3, 4, 5] y = [0, -1, -2, -3, -4, -5] # Check that we got increasing=False and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert_false(is_increasing)
def test_check_increasing_up(): x = [0, 1, 2, 3, 4, 5] y = [0, 1.5, 2.77, 8.99, 8.99, 50] # Check that we got increasing=True and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert_true(is_increasing)
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_check_increasing_down(): x = [0, 1, 2, 3, 4, 5] y = [0, -1.5, -2.77, -8.99, -8.99, -50] # Check that we got increasing=False and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert not is_increasing
def test_check_increasing_down_extreme(): x = [0, 1, 2, 3, 4, 5] y = [0, -1, -2, -3, -4, -5] # Check that we got increasing=False and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert not is_increasing
def test_check_increasing_up_extreme(): x = [0, 1, 2, 3, 4, 5] y = [0, 1, 2, 3, 4, 5] # Check that we got increasing=True and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert_true(is_increasing)
def test_prf_average_compat(): """Ensure warning if f1_score et al.'s average is implicit for multiclass """ y_true = [1, 2, 3, 3] y_pred = [1, 2, 3, 1] for metric in [precision_score, recall_score, f1_score, partial(fbeta_score, beta=2)]: score = assert_warns(DeprecationWarning, metric, y_true, y_pred) score_weighted = assert_no_warnings(metric, y_true, y_pred, average='weighted') assert_equal(score, score_weighted, 'average does not act like "weighted" by default') # check binary passes without warning assert_no_warnings(metric, [0, 1, 1], [0, 1, 0])
def test_check_increasing_down(): x = [0, 1, 2, 3, 4, 5] y = [0, -1.5, -2.77, -8.99, -8.99, -50] # Check that we got increasing=False and no warnings is_increasing = assert_no_warnings(check_increasing, x, y) assert_false(is_increasing)
def test_regressormixin_score_multioutput(): from sklearn.linear_model import LinearRegression # no warnings when y_type is continuous X = [[1], [2], [3]] y = [1, 2, 3] reg = LinearRegression().fit(X, y) assert_no_warnings(reg.score, X, y) # warn when y_type is continuous-multioutput y = [[1, 2], [2, 3], [3, 4]] reg = LinearRegression().fit(X, y) msg = ("The default value of multioutput (not exposed in " "score method) will change from 'variance_weighted' " "to 'uniform_average' in 0.23 to keep consistent " "with 'metrics.r2_score'. To use the new default, " "please either call 'metrics.r2_score' directly or " "make a custom scorer with 'metrics.make_scorer'.") assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert_equal(mdl.n_iter_, mdl.max_iter) mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert_equal(mdl.n_iter_, mdl.max_iter) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y)
def test_sensitivity_specificity_score_binary(): y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class sen, spe, sup = sensitivity_specificity_support( y_true, y_pred, average=None) assert_allclose(sen, [0.88, 0.68], rtol=R_TOL) assert_allclose(spe, [0.68, 0.88], rtol=R_TOL) assert_array_equal(sup, [25, 25]) # individual scoring function that can be used for grid search: in the # binary class case the score is the value of the measure for the positive # class (e.g. label == 1). This is deprecated for average != 'binary'. for kwargs in ({}, {'average': 'binary'}): sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs) assert sen == pytest.approx(0.68, rel=R_TOL) spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs) assert spe == pytest.approx(0.88, rel=R_TOL)
def test_pickle_version_warning_is_not_raised_with_matching_version(): iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert_true(b"version" in tree_pickle) tree_restored = assert_no_warnings(pickle.loads, tree_pickle) # test that we can predict with the restored decision tree classifier score_of_original = tree.score(iris.data, iris.target) score_of_restored = tree_restored.score(iris.data, iris.target) assert_equal(score_of_original, score_of_restored)