def test_mcd_increasing_det_warning(): # Check that a warning is raised if we observe increasing determinants # during the c_step. In theory the sequence of determinants should be # decreasing. Increasing determinants are likely due to ill-conditioned # covariance matrices that result in poor precision matrices. X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1], [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2], [5.2, 3.5, 1.5, 0.2]] mcd = MinCovDet(random_state=1) assert_warns_message(RuntimeWarning, "Determinant has increased", mcd.fit, X)
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message(UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message(UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings(affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_fetch_openml_australian(monkeypatch, gzip_response): # sparse dataset # Australian is the only sparse dataset that is reasonably small # as it is inactive, we need to catch the warning. Due to mocking # framework, it is not deactivated in our tests data_id = 292 data_name = 'Australian' data_version = 1 target_column = 'Y' # Not all original instances included for space reasons expected_observations = 85 expected_features = 14 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Version 1 of dataset Australian is inactive,", _fetch_dataset_from_openml, **{'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': True, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': False} # numpy specific check )
def test_fetch_openml_iris(monkeypatch, gzip_response): # classification dataset with numeric only columns data_id = 61 data_name = 'iris' data_version = 1 target_column = 'class' expected_observations = 150 expected_features = 4 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Multiple active versions of the dataset matching the name" " iris exist. Versions may be fundamentally different, " "returning version 1.", _fetch_dataset_from_openml, **{'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': False, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': True} )
def test_lda_dimension_warning(n_classes, n_features): # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]: # if n_components > min(n_classes - 1, n_features), raise warning # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be larger than min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) future_msg = ("In version 0.23, setting n_components > min(" "n_features, n_classes - 1) will raise a " "ValueError. You should set n_components to None" " (default), or a value smaller or equal to " "min(n_features, n_classes - 1).") assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
def test_redundant_bins(strategy, expected_bin_edges): X = [[0], [0], [0], [0], [3], [3]] kbd = KBinsDiscretizer(n_bins=3, strategy=strategy) msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
def test_overrided_gram_matrix(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) Gram = X.T.dot(X) clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram) assert_warns_message( UserWarning, "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", clf.fit, X, y)
def test_dataset_with_openml_error(monkeypatch, gzip_response): data_id = 1 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML registered a problem with the dataset. It might be unusable. " "Error:", fetch_openml, data_id=data_id, cache=False )
def test_dataset_with_openml_warning(monkeypatch, gzip_response): data_id = 3 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML raised a warning on the dataset. It might be unusable. " "Warning:", fetch_openml, data_id=data_id, cache=False )
def test_pickle_version_warning_is_issued_upon_different_version(): iris = datasets.load_iris() tree = TreeBadVersion().fit(iris.data, iris.target) tree_pickle_other = pickle.dumps(tree) message = pickle_error_message.format( estimator="TreeBadVersion", old_version="something", current_version=sklearn_lib.__version__) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
def test_n_neighbors_attribute(): X = iris.data clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X) assert clf.n_neighbors_ == X.shape[0] - 1 clf = neighbors.LocalOutlierFactor(n_neighbors=500) assert_warns_message(UserWarning, "n_neighbors will be set to (n_samples - 1)", clf.fit, X) assert clf.n_neighbors_ == X.shape[0] - 1
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", FutureWarning) # 0.23 check_array(df, dtype='object', warn_on_dtype=True) assert len(record) == 0 # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", FutureWarning) # 0.23 check_array(df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True) assert len(record) == 0
def test_percentile_numeric_stability(): X = np.array([0.05, 0.05, 0.95]).reshape(-1, 1) bin_edges = np.array([0.05, 0.23, 0.41, 0.59, 0.77, 0.95]) Xt = np.array([0, 0, 4]).reshape(-1, 1) kbd = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile') msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], bin_edges) assert_array_almost_equal(kbd.transform(X), Xt)
def test_max_samples_attribute(): X = iris.data clf = IsolationForest().fit(X) assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=500) assert_warns_message( UserWarning, "max_samples will be set to n_samples for estimation", clf.fit, X) assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=0.4).fit(X) assert clf.max_samples_ == 0.4 * X.shape[0]
def test_fetch_openml_inactive(monkeypatch, gzip_response): # fetch inactive dataset by id data_id = 40675 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) glas2 = assert_warns_message( UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=data_id, cache=False) # fetch inactive dataset by name and version assert glas2.data.shape == (163, 9) glas2_by_version = assert_warns_message( UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=None, name="glass2", version=1, cache=False) assert int(glas2_by_version.details['id']) == data_id
def test_affinities(): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs(n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) assert_warns_message(UserWarning, 'not fully connected', sp.fit, X) assert adjusted_rand_score(y, sp.labels_) == 1 sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0) labels = sp.fit(X).labels_ assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 kernels_available = kernel_metrics() for kern in kernels_available: # Additive chi^2 gives a negative similarity matrix which # doesn't make sense for spectral clustering if kern != 'additive_chi2': sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) labels = sp.fit(X).labels_ assert (X.shape[0], ) == labels.shape # raise error on unknown affinity sp = SpectralClustering(n_clusters=2, affinity='<unknown>') with pytest.raises(ValueError): sp.fit(X)
def test_transform_target_regressor_invertible(): X, y = friedman regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log, check_inverse=True) assert_warns_message( UserWarning, "The provided functions or transformer" " are not strictly inverse of each other.", regr.fit, X, y) regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log) regr.set_params(check_inverse=False) assert_no_warnings(regr.fit, X, y)
def test_multi_task_lasso_and_enet(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] # Y_test = np.c_[y_test, y_test] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1) assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle(): iris = datasets.load_iris() # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert b"version" not in tree_pickle_noversion message = pickle_error_message.format( estimator="TreeNoVersion", old_version="pre-0.18", current_version=sklearn_lib.__version__) # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion)
def test_linearsvx_loss_penalty_deprecations(): X, y = [[0.0], [1.0]], [0, 1] msg = ("loss='%s' has been deprecated in favor of " "loss='%s' as of 0.16. Backward compatibility" " for the %s will be removed in %s") # LinearSVC # loss l1 --> hinge assert_warns_message(FutureWarning, msg % ("l1", "hinge", "loss='l1'", "0.23"), svm.LinearSVC(loss="l1").fit, X, y) # loss l2 --> squared_hinge assert_warns_message(FutureWarning, msg % ("l2", "squared_hinge", "loss='l2'", "0.23"), svm.LinearSVC(loss="l2").fit, X, y) # LinearSVR # loss l1 --> epsilon_insensitive assert_warns_message( FutureWarning, msg % ("l1", "epsilon_insensitive", "loss='l1'", "0.23"), svm.LinearSVR(loss="l1").fit, X, y) # loss l2 --> squared_epsilon_insensitive assert_warns_message( FutureWarning, msg % ("l2", "squared_epsilon_insensitive", "loss='l2'", "0.23"), svm.LinearSVR(loss="l2").fit, X, y)
def test_same_min_max(strategy): warnings.simplefilter("always") X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]]) est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal') assert_warns_message(UserWarning, "Feature 0 is constant and will be replaced " "with 0.", est.fit, X) assert est.n_bins_[0] == 1 # replace the feature with zeros Xt = est.transform(X) assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
def test_multilabel_binarizer_unknown_class(): mlb = MultiLabelBinarizer() y = [[1, 2]] Y = np.array([[1, 0], [0, 1]]) w = 'unknown class(es) [0, 4] will be ignored' matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y) Y = np.array([[1, 0, 0], [0, 1, 0]]) mlb = MultiLabelBinarizer(classes=[1, 2, 3]) matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y)
def test_tfidf_no_smoothing(): X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') tfidf = tr.fit_transform(X).toarray() assert (tfidf >= 0).all() # check normalization assert_array_almost_equal((tfidf ** 2).sum(axis=1), [1., 1., 1.]) # the lack of smoothing make IDF fragile in the presence of feature with # only zeros X = [[1, 1, 0], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm='l2') clean_warning_registry() with warnings.catch_warnings(record=True) as w: 1. / np.array([0.]) numpy_provides_div0_warning = len(w) == 1 in_warning_message = 'divide by zero' tfidf = assert_warns_message(RuntimeWarning, in_warning_message, tr.fit_transform, X).toarray() if not numpy_provides_div0_warning: raise SkipTest("Numpy does not provide div 0 warnings.")
def test_randomized_svd_sparse_warnings(): # randomized_svd throws a warning for lil and dok matrix rng = np.random.RandomState(42) X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng) n_components = 5 for cls in (sparse.lil_matrix, sparse.dok_matrix): X = cls(X) assert_warns_message(sparse.SparseEfficiencyWarning, "Calculating SVD of a {} is expensive. " "csr_matrix is more efficient.".format( cls.__name__), randomized_svd, X, n_components, n_iter=1, power_iteration_normalizer='none')
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message( UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
def test_deprecated(): assert_warns_message(FutureWarning, 'qwerty', MockClass1) assert_warns_message(FutureWarning, 'mockclass2_method', MockClass2().method) assert_warns_message(FutureWarning, 'deprecated', MockClass3) val = assert_warns_message(FutureWarning, 'deprecated', mock_function) assert val == 10
def test_check_ci_warn(): x = [0, 1, 2, 3, 4, 5] y = [0, -1, 2, -3, 4, -5] # Check that we got increasing=False and CI interval warning is_increasing = assert_warns_message(UserWarning, "interval", check_increasing, x, y) assert not is_increasing
def test_gaussian_mixture_fit_convergence_warning(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=1) n_components = rand_data.n_components max_iter = 1 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, max_iter=max_iter, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_warns_message( ConvergenceWarning, 'Initialization %d did not converge. ' 'Try different init parameters, ' 'or increase max_iter, tol ' 'or check for degenerate data.' % max_iter, g.fit, X)
def test_regressormixin_score_multioutput(): from sklearn_lib.linear_model import LinearRegression # no warnings when y_type is continuous X = [[1], [2], [3]] y = [1, 2, 3] reg = LinearRegression().fit(X, y) assert_no_warnings(reg.score, X, y) # warn when y_type is continuous-multioutput y = [[1, 2], [2, 3], [3, 4]] reg = LinearRegression().fit(X, y) msg = ("The default value of multioutput (not exposed in " "score method) will change from 'variance_weighted' " "to 'uniform_average' in 0.23 to keep consistent " "with 'metrics.r2_score'. To specify the default " "value manually and avoid the warning, please " "either call 'metrics.r2_score' directly or make a " "custom scorer with 'metrics.make_scorer' (the " "built-in scorer 'r2' uses " "multioutput='uniform_average').") assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_uniform_strategy_sparse_target_warning(): X = [[0]] * 5 # ignored y = sp.csc_matrix(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]])) clf = DummyClassifier(strategy="uniform", random_state=0) assert_warns_message(UserWarning, "the uniform strategy would not save memory", clf.fit, X, y) X = [[0]] * 500 y_pred = clf.predict(X) for k in range(y.shape[1]): p = np.bincount(y_pred[:, k]) / float(len(X)) assert_almost_equal(p[1], 1 / 3, decimal=1) assert_almost_equal(p[2], 1 / 3, decimal=1) assert_almost_equal(p[4], 1 / 3, decimal=1)