def test_fetch_openml_iris(monkeypatch, gzip_response): # classification dataset with numeric only columns data_id = 61 data_name = 'iris' data_version = 1 target_column = 'class' expected_observations = 150 expected_features = 4 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Multiple active versions of the dataset matching the name" " iris exist. Versions may be fundamentally different, " "returning version 1.", _fetch_dataset_from_openml, **{'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': False, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': True} )
def test_kfold_valueerrors(): X1 = np.array([[1, 2], [3, 4], [5, 6]]) X2 = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]) # Check that errors are raised if there is not enough samples assert_raises(ValueError, next, KFold(4).split(X1)) # Check that a warning is raised if the least populated class has too few # members. y = np.array([3, 3, -1, -1, 2]) skf_3 = StratifiedKFold(3) assert_warns_message(Warning, "The least populated class", next, skf_3.split(X2, y)) # Check that despite the warning the folds are still computed even # though all the classes are not necessarily represented at on each # side of the split at each split with warnings.catch_warnings(): check_cv_coverage(skf_3, X2, y, labels=None, expected_n_iter=3) # Error when number of folds is <= 1 assert_raises(ValueError, KFold, 0) assert_raises(ValueError, KFold, 1) assert_raises(ValueError, StratifiedKFold, 0) assert_raises(ValueError, StratifiedKFold, 1) # When n_folds is not integer: assert_raises(ValueError, KFold, 1.5) assert_raises(ValueError, KFold, 2.0) assert_raises(ValueError, StratifiedKFold, 1.5) assert_raises(ValueError, StratifiedKFold, 2.0) # When shuffle is not a bool: assert_raises(TypeError, KFold, n_folds=4, shuffle=None)
def test_lda_dimension_warning(n_classes, n_features): # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: # if n_components > min(n_classes - 1, n_features), raise warning # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be larger than min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) future_msg = ("In version 0.23, setting n_components > min(" "n_features, n_classes - 1) will raise a " "ValueError. You should set n_components to None" " (default), or a value smaller or equal to " "min(n_features, n_classes - 1).") assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
def test_load_lfw_pairs_deprecation(): msg = ( "Function 'load_lfw_pairs' has been deprecated in 0.17 and will be " "removed in 0.19." "Use fetch_lfw_pairs(download_if_missing=False) instead." ) assert_warns_message(DeprecationWarning, msg, load_lfw_pairs, data_home=SCIKIT_LEARN_DATA)
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True) # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) assert_no_warnings(check_array, df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True)
def test_raw_values_deprecation(): X = [[0.0], [1.0]] clf = EllipticEnvelope().fit(X) assert_warns_message(DeprecationWarning, "raw_values parameter is deprecated in 0.20 and will" " be removed in 0.22.", clf.decision_function, X, raw_values=True)
def test_deprecated_auc_reorder(): depr_message = ("The 'reorder' parameter has been deprecated in version " "0.20 and will be removed in 0.22. It is recommended not " "to set 'reorder' and ensure that x is monotonic " "increasing or monotonic decreasing.") assert_warns_message(DeprecationWarning, depr_message, auc, [1, 2], [2, 3], reorder=True)
def test_deprecated_calinski_harabaz_score(): depr_message = ("Function 'calinski_harabaz_score' has been renamed " "to 'calinski_harabasz_score' " "and will be removed in version 0.23.") assert_warns_message(DeprecationWarning, depr_message, calinski_harabaz_score, np.ones((10, 2)), [0] * 5 + [1] * 5)
def test_sample_weight_warning(): n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test = X[n_samples:] for method in ['sigmoid', 'isotonic']: base_estimator = LinearSVC(random_state=42) calibrated_clf = CalibratedClassifierCV(base_estimator, method=method) # LinearSVC does not currently support sample weights but they # can still be used for the calibration step (with a warning) msg = "LinearSVC does not support sample_weight." assert_warns_message( UserWarning, msg, calibrated_clf.fit, X_train, y_train, sample_weight=sw_train) probs_with_sw = calibrated_clf.predict_proba(X_test) # As the weights are used for the calibration, they should still yield # a different predictions calibrated_clf.fit(X_train, y_train) probs_without_sw = calibrated_clf.predict_proba(X_test) diff = np.linalg.norm(probs_with_sw - probs_without_sw) assert_greater(diff, 0.1)
def test_wishart_log_det(): a = np.array([0.1, 0.8, 0.01, 0.09]) b = np.array([0.2, 0.7, 0.05, 0.1]) assert_warns_message(DeprecationWarning, "The function " "wishart_log_det is deprecated in 0.18 and" " will be removed in 0.20.", wishart_log_det, a, b, 2, 4)
def test_rfe_deprecation_estimator_params(): deprecation_message = ( "The parameter 'estimator_params' is deprecated as " "of version 0.16 and will be removed in 0.18. The " "parameter is no longer necessary because the " "value is set via the estimator initialisation or " "set_params method." ) generator = check_random_state(0) iris = load_iris() X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))] y = iris.target assert_warns_message( DeprecationWarning, deprecation_message, RFE(estimator=SVC(), n_features_to_select=4, step=0.1, estimator_params={"kernel": "linear"}).fit, X=X, y=y, ) assert_warns_message( DeprecationWarning, deprecation_message, RFECV(estimator=SVC(), step=1, cv=5, estimator_params={"kernel": "linear"}).fit, X=X, y=y, )
def test_fetch_openml_australian(monkeypatch, gzip_response): # sparse dataset # Australian is the only sparse dataset that is reasonably small # as it is inactive, we need to catch the warning. Due to mocking # framework, it is not deactivated in our tests data_id = 292 data_name = 'Australian' data_version = 1 target_column = 'Y' # Not all original instances included for space reasons expected_observations = 85 expected_features = 14 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Version 1 of dataset Australian is inactive,", _fetch_dataset_from_openml, **{'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': True, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': False} # numpy specific check )
def test_redundant_bins(strategy, expected_bin_edges): X = [[0], [0], [0], [0], [3], [3]] kbd = KBinsDiscretizer(n_bins=3, strategy=strategy) msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
def test_label_binarize_multilabel(): y_seq = [(1,), (0, 1, 2), tuple()] y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]]) classes = [0, 1, 2] pos_label = 2 neg_label = 0 expected = pos_label * y_ind y_sparse = [sparse_matrix(y_ind) for sparse_matrix in [coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix]] for y in [y_ind] + y_sparse: yield (check_binarized_results, y, classes, pos_label, neg_label, expected) deprecation_message = ("Direct support for sequence of sequences " + "multilabel representation will be unavailable " + "from version 0.17. Use sklearn.preprocessing." + "MultiLabelBinarizer to convert to a label " + "indicator representation.") assert_warns_message(DeprecationWarning, deprecation_message, check_binarized_results, y_seq, classes, pos_label, neg_label, expected) assert_raises(ValueError, label_binarize, y, classes, neg_label=-1, pos_label=pos_label, sparse_output=True)
def test_threshold_deprecation(): X = [[0.0], [1.0]] clf = EllipticEnvelope().fit(X) assert_warns_message(DeprecationWarning, "threshold_ attribute is deprecated in 0.20 and will" " be removed in 0.22.", getattr, clf, "threshold_")
def test_mcd_increasing_det_warning(): # Check that a warning is raised if we observe increasing determinants # during the c_step. In theory the sequence of determinants should be # decreasing. Increasing determinants are likely due to ill-conditioned # covariance matrices that result in poor precision matrices. X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1], [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2], [5.2, 3.5, 1.5, 0.2]] mcd = MinCovDet(random_state=1) assert_warns_message(RuntimeWarning, "Determinant has increased", mcd.fit, X)
def test_nystroem_callable(): # Test Nystroem on a callable. rnd = np.random.RandomState(42) n_samples = 10 X = rnd.uniform(size=(n_samples, 4)) def logging_histogram_kernel(x, y, log): """Histogram kernel that writes to a log.""" log.append(1) return np.minimum(x, y).sum() kernel_log = [] X = list(X) # test input validation Nystroem(kernel=logging_histogram_kernel, n_components=(n_samples - 1), kernel_params={'log': kernel_log}).fit(X) assert_equal(len(kernel_log), n_samples * (n_samples - 1) / 2) def linear_kernel(X, Y): return np.dot(X, Y.T) # if degree, gamma or coef0 is passed, we raise a warning msg = "Passing gamma, coef0 or degree to Nystroem" params = ({'gamma': 1}, {'coef0': 1}, {'degree': 2}) for param in params: ny = Nystroem(kernel=linear_kernel, **param) assert_warns_message(DeprecationWarning, msg, ny.fit, X)
def test_convergence_warning(dataset, algo_class): X, y = dataset model = algo_class(max_iter=2, verbose=True) cls_name = model.__class__.__name__ assert_warns_message(ConvergenceWarning, '[{}] {} did not converge'.format(cls_name, cls_name), model.fit, X, y)
def test_pickle_version_warning(): # check that warnings are raised when unpickling in a different version # first, check no warning when in the same version: iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert_true(b"version" in tree_pickle) assert_no_warnings(pickle.loads, tree_pickle) # check that warning is raised on different version tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(), b"something") message = ("Trying to unpickle estimator DecisionTreeClassifier from " "version {0} when using version {1}. This might lead to " "breaking code or invalid results. " "Use at your own risk.".format("something", sklearn.__version__)) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other) # check that not including any version also works: # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert_false(b"version" in tree_pickle_noversion) message = message.replace("something", "pre-0.18") message = message.replace("DecisionTreeClassifier", "TreeNoVersion") # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion) # check that no warning is raised for external estimators TreeNoVersion.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion)
def test_vectorizer_stop_words_inconsistent(): if PY2: lstr = "[u'and', u'll', u've']" else: lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) for vec in [CountVectorizer(), TfidfVectorizer(), HashingVectorizer()]: vec.set_params(stop_words=["you've", "you", "you'll", 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world']) # reset stop word validation del vec._stop_words_id assert _check_stop_words_consistency(vec) is False # Only one warning per stop list assert_no_warnings(vec.fit_transform, ['hello world']) assert _check_stop_words_consistency(vec) is None # Test caching of inconsistency assessment vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world'])
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_multinomial_logistic_regression_with_classweight_auto(): X, y = iris.data, iris.target model = LogisticRegression(multi_class='multinomial', class_weight='auto', solver='lbfgs') # 'auto' is deprecated and will be removed in 0.19 assert_warns_message(DeprecationWarning, "class_weight='auto' heuristic is deprecated", model.fit, X, y)
def test_convergence_warning(): degree = 4 y = _lifted_predict(U[:degree], X) est = PolynomialNetworkRegressor(degree=degree, n_components=n_components, beta=1e-10, max_iter=1, tol=1e-5, random_state=0) assert_warns_message(UserWarning, "converge", est.fit, X, y)
def test_clone_copy_init_params(): # test for deprecation warning when copying or casting an init parameter est = ModifyInitParams() message = ("Estimator ModifyInitParams modifies parameters in __init__. " "This behavior is deprecated as of 0.18 and support " "for this behavior will be removed in 0.20.") assert_warns_message(DeprecationWarning, message, clone, est)
def test_pickle_version_warning_is_issued_upon_different_version(): iris = datasets.load_iris() tree = TreeBadVersion().fit(iris.data, iris.target) tree_pickle_other = pickle.dumps(tree) message = pickle_error_message.format(estimator="TreeBadVersion", old_version="something", current_version=sklearn.__version__) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
def test_repeated_x(minimizer): assert_warns_message( UserWarning, "has been evaluated at", minimizer, lambda x: x[0], dimensions=[[0, 1]], x0=[[0], [1]], n_random_starts=0, n_calls=3) assert_warns_message( UserWarning, "has been evaluated at", minimizer, bench4, dimensions=[("0", "1")], x0=[["0"], ["1"]], n_calls=3, n_random_starts=0)
def test_future_warning(): score_funcs_with_changing_means = [ normalized_mutual_info_score, adjusted_mutual_info_score, ] warning_msg = "The behavior of " args = [0, 0, 0], [0, 0, 0] for score_func in score_funcs_with_changing_means: assert_warns_message(FutureWarning, warning_msg, score_func, *args)
def test_spectral_embeding_import(): random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) assert_warns_message(DeprecationWarning, "spectral_embedding is deprecated", spectral_embedding, sims) assert_warns_message(DeprecationWarning, "SpectralEmbedding is deprecated", SpectralEmbedding)
def test_dataset_with_openml_warning(monkeypatch, gzip_response): data_id = 3 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML raised a warning on the dataset. It might be unusable. " "Warning:", fetch_openml, data_id=data_id, cache=False )
def test_dataset_with_openml_error(monkeypatch, gzip_response): data_id = 1 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML registered a problem with the dataset. It might be unusable. " "Error:", fetch_openml, data_id=data_id, cache=False )
def test_select_kbest_zero(): # Test whether k=0 correctly returns no features. X, y = make_classification(n_samples=20, n_features=10, shuffle=False, random_state=0) univariate_filter = SelectKBest(f_classif, k=0) univariate_filter.fit(X, y) support = univariate_filter.get_support() gtruth = np.zeros(10, dtype=bool) assert_array_equal(support, gtruth) X_selected = assert_warns_message(UserWarning, 'No features were selected', univariate_filter.transform, X) assert X_selected.shape == (20, 0)
def check_regressors_no_decision_function(name, Regressor): # checks whether regressors have decision_function or predict_proba rng = np.random.RandomState(0) X = rng.normal(size=(10, 4)) y = multioutput_estimator_convert_y_2d(name, X[:, 0]) regressor = Regressor() set_fast_parameters(regressor) if hasattr(regressor, "n_components"): # FIXME CCA, PLS is not robust to rank 1 effects regressor.n_components = 1 regressor.fit(X, y) funcs = ["decision_function", "predict_proba", "predict_log_proba"] for func_name in funcs: func = getattr(regressor, func_name, None) if func is None: # doesn't have function continue # has function. Should raise deprecation warning msg = func_name assert_warns_message(DeprecationWarning, msg, func, X)
def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor] for Estimator in all_estimators: est = Estimator(min_impurity_split=0.1) est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: assert_equal(tree.min_impurity_split, 0.1)
def test_vectorizer_stop_words_inconsistent(): lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) for vec in [CountVectorizer(), TfidfVectorizer(), HashingVectorizer()]: vec.set_params(stop_words=["you've", "you", "you'll", 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world']) # reset stop word validation del vec._stop_words_id assert _check_stop_words_consistency(vec) is False # Only one warning per stop list assert_no_warnings(vec.fit_transform, ['hello world']) assert _check_stop_words_consistency(vec) is None # Test caching of inconsistency assessment vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND']) assert_warns_message(UserWarning, message, vec.fit_transform, ['hello world'])
def test_candidates(): # Checks whether candidates are sufficient. # This should handle the cases when number of candidates is 0. # User should be warned when number of candidates is less than # requested number of neighbors. X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1], [6, 10, 2]], dtype=np.float32) X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1) # For zero candidates lshf = ignore_warnings(LSHForest, category=DeprecationWarning)( min_hash_match=32) ignore_warnings(lshf.fit)(X_train) message = ("Number of candidates is not sufficient to retrieve" " %i neighbors with" " min_hash_match = %i. Candidates are filled up" " uniformly from unselected" " indices." % (3, 32)) assert_warns_message(UserWarning, message, lshf.kneighbors, X_test, n_neighbors=3) distances, neighbors = lshf.kneighbors(X_test, n_neighbors=3) assert_equal(distances.shape[1], 3) # For candidates less than n_neighbors lshf = ignore_warnings(LSHForest, category=DeprecationWarning)( min_hash_match=31) ignore_warnings(lshf.fit)(X_train) message = ("Number of candidates is not sufficient to retrieve" " %i neighbors with" " min_hash_match = %i. Candidates are filled up" " uniformly from unselected" " indices." % (5, 31)) assert_warns_message(UserWarning, message, lshf.kneighbors, X_test, n_neighbors=5) distances, neighbors = lshf.kneighbors(X_test, n_neighbors=5) assert_equal(distances.shape[1], 5)
def test_lda_dimension_warning(n_classes, n_features): rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: # if n_components > min(n_classes - 1, n_features), raise warning lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be superior to min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y)
def test_deprecated_grid_search_iid(self): depr_message = ("The default of the `iid` parameter will change from True " "to False in version 0.22") X, y = make_blobs(n_samples=54, random_state=0, centers=2) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=3) # no warning with equally sized test sets assert_no_warnings(grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=5) # warning because 54 % 5 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=2) # warning because stratification into two classes and 27 % 2 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=KFold(2)) # no warning because no stratification and 54 % 2 == 0 assert_no_warnings(grid.fit, X, y)
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization msg = ("The default value of init will change from " "random to None in 0.23 to make it consistent " "with decomposition.NMF.") assert_warns_message(FutureWarning, msg, nnmf, A, A, A, np.int64(1)) msg = ("Number of components must be a positive integer; " "got (n_components=1.5)") assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5, 'random') msg = ("Number of components must be a positive integer; " "got (n_components='2')") assert_raise_message(ValueError, msg, nnmf, A, A, A, '2', 'random') msg = "Negative values in data passed to NMF (input H)" assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom') msg = "Negative values in data passed to NMF (input W)" assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom') msg = "Array passed to NMF (input H) is full of zeros" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom') msg = "Invalid regularization parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom', True, 'cd', 2., 1e-4, 200, 0., 0., 'spam')
def test_skope_rules_error(): """Test that it gives proper exception on deficient input.""" X = iris.data y = iris.target y = (y != 0) # Test max_samples assert_raises(ValueError, SkopeRules(max_samples=-1).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=0.0).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=2.0).fit, X, y) # explicitly setting max_samples > n_samples should result in a warning. assert_warns_message( UserWarning, "max_samples will be set to n_samples for estimation", SkopeRules(max_samples=1000).fit, X, y) assert_no_warnings(SkopeRules(max_samples=np.int64(2)).fit, X, y) assert_raises(ValueError, SkopeRules(max_samples='foobar').fit, X, y) assert_raises(ValueError, SkopeRules(max_samples=1.5).fit, X, y) assert_raises(ValueError, SkopeRules(max_depth_duplication=1.5).fit, X, y) assert_raises(ValueError, SkopeRules().fit(X, y).predict, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).decision_function, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).rules_vote, X[:, 1:]) assert_raises(ValueError, SkopeRules().fit(X, y).score_top_rules, X[:, 1:])
def test_one_hot_encoder_deprecationwarnings(): for X in [[[3, 2, 1], [0, 1, 1]], [[3., 2., 1.], [0., 1., 1.]]]: enc = OneHotEncoder() assert_warns_message(FutureWarning, "handling of integer", enc.fit, X) enc = OneHotEncoder() assert_warns_message(FutureWarning, "handling of integer", enc.fit_transform, X) # check it still works correctly as well with ignore_warnings(category=FutureWarning): X_trans = enc.fit_transform(X).toarray() res = [[0., 1., 0., 1., 1.], [1., 0., 1., 0., 1.]] assert_array_equal(X_trans, res) # check deprecated attributes assert_warns(DeprecationWarning, lambda: enc.active_features_) assert_warns(DeprecationWarning, lambda: enc.feature_indices_) assert_warns(DeprecationWarning, lambda: enc.n_values_) # check no warning is raised if keyword is specified enc = OneHotEncoder(categories='auto') assert_no_warnings(enc.fit, X) enc = OneHotEncoder(categories='auto') assert_no_warnings(enc.fit_transform, X) X_trans = enc.fit_transform(X).toarray() assert_array_equal(X_trans, res) # check there is also a warning if the default is passed enc = OneHotEncoder(n_values='auto', handle_unknown='ignore') assert_warns(DeprecationWarning, enc.fit, X) X = np.array([['cat1', 'cat2']], dtype=object).T enc = OneHotEncoder(categorical_features='all') assert_warns(DeprecationWarning, enc.fit, X)
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True) assert_warns_message( UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'.", trans.fit, X) trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, check_inverse=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, check_inverse=True) assert_no_warnings(trans.fit, X_dense)
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df, dtype='object', warn_on_dtype=True) assert len(record) == 0 # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True) assert len(record) == 0
def test_lmvnpdf_spherical(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) spherecv = rng.rand(n_components, 1)**2 + 1 X = rng.randint(10) * rng.rand(n_samples, n_features) cv = np.tile(spherecv, (n_features, 1)) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = assert_warns_message( DeprecationWarning, "The function" " log_multivariate_normal_density is " "deprecated in 0.18 and will be removed in 0.20.", mixture.log_multivariate_normal_density, X, mu, spherecv, 'spherical') assert_array_almost_equal(lpr, reference)
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting # max_samples > n_samples should result in a warning. If not set # explicitly there should be no warning assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X) # note that assert_no_warnings does not apply since it enables a # PendingDeprecationWarning triggered by scipy.sparse's use of # np.matrix. See issue #11251. with pytest.warns(None) as record: IsolationForest(max_samples='auto').fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 with pytest.warns(None) as record: IsolationForest(max_samples=np.int64(2)).fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X) assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X) # test X_test n_features match X_train one: assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])
def test_warning_scaling_integers(): # Check warning when scaling integer data X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8) w = "Data with input dtype uint8 was converted to float64" clean_warning_registry() assert_warns_message(DataConversionWarning, w, scale, X) assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X) assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)
def test_warning_scaling_integers(): """Check warning when scaling integer data""" X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8) w = "assumes floating point values as input, got uint8" clean_warning_registry() assert_warns_message(UserWarning, w, scale, X) assert_warns_message(UserWarning, w, StandardScaler().fit, X) assert_warns_message(UserWarning, w, MinMaxScaler().fit, X)
def test_lmvnpdf_full(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0)**2 X = rng.randint(10) * rng.rand(n_samples, n_features) fullcv = np.array([np.diag(x) for x in cv]) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = assert_warns_message( DeprecationWarning, "The function" " log_multivariate_normal_density is " "deprecated in 0.18 and will be removed in 0.20.", mixture.log_multivariate_normal_density, X, mu, fullcv, 'full') assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_diag(): # test a slow and naive implementation of lmvnpdf and # compare it to the vectorized version (mixture.lmvnpdf) to test # for correctness n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0)**2 X = rng.randint(10) * rng.rand(n_samples, n_features) ref = _naive_lmvnpdf_diag(X, mu, cv) lpr = assert_warns_message( DeprecationWarning, "The function" " log_multivariate_normal_density is " "deprecated in 0.18 and will be removed in 0.20.", mixture.log_multivariate_normal_density, X, mu, cv, 'diag') assert_array_almost_equal(lpr, ref)
def test_gene_expression_filter_warning(): X = data.load_10X(sparse=True) genes = np.arange(10) gene_outside_range = 100 no_genes = 'not_a_gene' assert_warns_message(UserWarning, "`percentile` expects values between 0 and 100." "Got 0.9. Did you mean 90.0?", scprep.filter.filter_gene_set_expression, X, genes, percentile=0.90, keep_cells='below') assert_raise_message( ValueError, "Only one of `cutoff` and `percentile` should be given.", scprep.filter.filter_gene_set_expression, X, genes, percentile=0.90, cutoff=50) assert_raise_message(ValueError, "Expected `keep_cells` in ['above', 'below']. " "Got neither", scprep.filter.filter_gene_set_expression, X, genes, percentile=90.0, keep_cells='neither') assert_warns_message(UserWarning, "`percentile` expects values between 0 and 100." "Got 0.9. Did you mean 90.0?", scprep.filter.filter_gene_set_expression, X, genes, percentile=0.90, keep_cells='below') assert_raise_message( ValueError, "One of either `cutoff` or `percentile` must be given.", scprep.filter.filter_gene_set_expression, X, genes, cutoff=None, percentile=None) assert_raise_message(KeyError, "the label [not_a_gene] is not in the [columns]", scprep.filter.filter_gene_set_expression, X, no_genes, percentile=90.0, keep_cells='below') assert_warns_message(UserWarning, "Selecting 0 columns", scprep.utils.select_cols, X, (X.sum(axis=0) < 0))
def test_deprecated_remove(self): assert_warns_message( DeprecationWarning, "`scprep.filter.remove_empty_genes` is deprecated. Use " "`scprep.filter.filter_empty_genes` instead.", scprep.filter.remove_empty_genes, self.X_dense) assert_warns_message( DeprecationWarning, "`scprep.filter.remove_rare_genes` is deprecated. Use " "`scprep.filter.filter_rare_genes` instead.", scprep.filter.remove_rare_genes, self.X_dense) assert_warns_message( DeprecationWarning, "`scprep.filter.remove_empty_cells` is deprecated. Use " "`scprep.filter.filter_empty_cells` instead.", scprep.filter.remove_empty_cells, self.X_dense) assert_warns_message( DeprecationWarning, "`scprep.filter.remove_duplicates` is deprecated. Use " "`scprep.filter.filter_duplicates` instead.", scprep.filter.remove_duplicates, self.X_dense)
def test_no_feature_selected(): rng = np.random.RandomState(0) # Generate random uncorrelated data: a strict univariate test should # rejects all the features X = rng.rand(40, 10) y = rng.randint(0, 4, size=40) strict_selectors = [ SelectFwe(alpha=0.01).fit(X, y), SelectFdr(alpha=0.01).fit(X, y), SelectFpr(alpha=0.01).fit(X, y), SelectPercentile(percentile=0).fit(X, y), SelectKBest(k=0).fit(X, y), ] for selector in strict_selectors: assert_array_equal(selector.get_support(), np.zeros(10)) X_selected = assert_warns_message( UserWarning, 'No features were selected', selector.transform, X) assert_equal(X_selected.shape, (40, 0))
def test_return_train_score_warn(self): from sklearn.utils.testing import ignore_warnings # Test that warnings are raised. Will be removed in 0.21 X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) grid = {'C': [1, 2]} estimators = [ GridSearchCV(LinearSVC(random_state=0), grid, iid=False, cv=3), RandomizedSearchCV(LinearSVC(random_state=0), grid, n_iter=2, iid=False, cv=3) ] result = {} for estimator in estimators: for val in [True, False, 'warn']: estimator.set_params(return_train_score=val) fit_func = ignore_warnings(estimator.fit, category=ConvergenceWarning) result[val] = assert_no_warnings(fit_func, X, y).cv_results_ train_keys = [ 'split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score' ] for key in train_keys: msg = ('You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) train_score = assert_warns_message(FutureWarning, msg, result['warn'].get, key) assert np.allclose(train_score, result[True][key]) assert key not in result[False] for key in result['warn']: if key not in train_keys: assert_no_warnings(result['warn'].get, key)
def test_warn_ignore_attribute(monkeypatch, gzip_response): data_id = 40966 expected_row_id_msg = "target_column={} has flag is_row_identifier." expected_ignore_msg = "target_column={} has flag is_ignore." _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # single column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column='MouseID', cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column='Genotype', cache=False) # multi column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column=['MouseID', 'class'], cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column=['Genotype', 'class'], cache=False)
def test_unicode_decode_error(): # decode_error default to strict, so this should fail # First, encode (as bytes) a unicode string. text = "J'ai mang\xe9 du kangourou ce midi, c'\xe9tait pas tr\xeas bon." text_bytes = text.encode('utf-8') # Then let the Analyzer try to decode it as ascii. It should fail, # because we have given it an incorrect encoding. wa = CountVectorizer(ngram_range=(1, 2), encoding='ascii').build_analyzer() assert_raises(UnicodeDecodeError, wa, text_bytes) ca = CountVectorizer(analyzer='char', ngram_range=(3, 6), encoding='ascii').build_analyzer() assert_raises(UnicodeDecodeError, ca, text_bytes) # Check the old interface in_warning_message = 'charset' ca = assert_warns_message(DeprecationWarning, in_warning_message, CountVectorizer, analyzer='char', ngram_range=(3, 6), charset='ascii').build_analyzer() assert_raises(UnicodeDecodeError, ca, text_bytes)
def test_deprecation(): X = [[0.0], [1.0]] clf = IsolationForest() assert_warns_message( FutureWarning, 'default contamination parameter 0.1 will change ' 'in version 0.22 to "auto"', clf.fit, X) assert_warns_message( FutureWarning, 'behaviour="old" is deprecated and will be removed ' 'in version 0.22', clf.fit, X) clf = IsolationForest().fit(X) assert_warns_message( DeprecationWarning, "threshold_ attribute is deprecated in 0.20 and will" " be removed in 0.22.", getattr, clf, "threshold_")
def test_tfidf_no_smoothing(): X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') tfidf = tr.fit_transform(X).toarray() assert_true((tfidf >= 0).all()) # check normalization assert_array_almost_equal((tfidf**2).sum(axis=1), [1., 1., 1.]) # the lack of smoothing make IDF fragile in the presence of feature with # only zeros X = [[1, 1, 0], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') with warnings.catch_warnings(record=True) as w: 1. / np.array([0.]) numpy_provides_div0_warning = len(w) == 1 in_warning_message = 'divide by zero' tfidf = assert_warns_message(RuntimeWarning, in_warning_message, tr.fit_transform, X).toarray() if not numpy_provides_div0_warning: raise SkipTest("Numpy does not provide div 0 warnings.")
def test_radius_neighbors_regressor(n_samples=40, n_features=3, n_test_pts=10, radius=0.5, random_state=0): # Test radius-based neighbors regression rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X**2).sum(1)) y /= y.max() y_target = y[:n_test_pts] weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: neigh = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm=algorithm) neigh.fit(X, y) epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = neigh.predict(X[:n_test_pts] + epsilon) assert_true(np.all(abs(y_pred - y_target) < radius / 2)) # test that nan is returned when no nearby observations for weights in ['uniform', 'distance']: neigh = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm='auto') neigh.fit(X, y) X_test_nan = np.ones((1, n_features)) * -1 empty_warning_msg = ("One or more samples have no neighbors " "within specified radius; predicting NaN.") pred = assert_warns_message(UserWarning, empty_warning_msg, neigh.predict, X_test_nan) assert_true(np.all(np.isnan(pred)))
def test_kfold_valueerrors(): # Check that errors are raised if there is not enough samples assert_raises(ValueError, cval.KFold, 3, 4) # Check that a warning is raised if the least populated class has too few # members. y = [3, 3, -1, -1, 3] cv = assert_warns_message(Warning, "The least populated class", cval.StratifiedKFold, y, 3) # Check that despite the warning the folds are still computed even # though all the classes are not necessarily represented at on each # side of the split at each split check_cv_coverage(cv, expected_n_iter=3, n_samples=len(y)) # Check that errors are raised if all n_labels for individual # classes are less than n_folds. y = [3, 3, -1, -1, 2] assert_raises(ValueError, cval.StratifiedKFold, y, 3) # Error when number of folds is <= 1 assert_raises(ValueError, cval.KFold, 2, 0) assert_raises(ValueError, cval.KFold, 2, 1) error_string = ("k-fold cross validation requires at least one" " train / test split") assert_raise_message(ValueError, error_string, cval.StratifiedKFold, y, 0) assert_raise_message(ValueError, error_string, cval.StratifiedKFold, y, 1) # When n is not integer: assert_raises(ValueError, cval.KFold, 2.5, 2) # When n_folds is not integer: assert_raises(ValueError, cval.KFold, 5, 1.5) assert_raises(ValueError, cval.StratifiedKFold, y, 1.5)
def test_generate_colorbar_mappable(self): im = plt.imshow([np.arange(10), np.arange(10)]) scprep.plot.tools.generate_colorbar(mappable=im) assert_warns_message( UserWarning, "Cannot set `vmin` or `vmax` when `mappable` is given.", scprep.plot.tools.generate_colorbar, mappable=im, vmin=10, vmax=20) assert_warns_message(UserWarning, "Cannot set `cmap` when `mappable` is given.", scprep.plot.tools.generate_colorbar, mappable=im, cmap='inferno') assert_warns_message(UserWarning, "Cannot set `scale` when `mappable` is given.", scprep.plot.tools.generate_colorbar, mappable=im, scale='log')
def test_scatter_invalid_legend(self): assert_warns_message( UserWarning, "`c` is a color array and cannot be used to create a " "legend. To interpret these values as labels instead, " "provide a `cmap` dictionary with label-color pairs.", scprep.plot.scatter2d, self.X_pca, legend=True, c=np.random.choice(['red', 'blue'], self.X_pca.shape[0], replace=True)) assert_warns_message(UserWarning, "Cannot create a legend with constant `c=red`", scprep.plot.scatter2d, self.X_pca, legend=True, c='red') assert_warns_message(UserWarning, "Cannot create a legend with constant `c=None`", scprep.plot.scatter2d, self.X_pca, legend=True, c=None)