def test_redundant_bins(strategy, expected_bin_edges): X = [[0], [0], [0], [0], [3], [3]] kbd = KBinsDiscretizer(n_bins=3, strategy=strategy) msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
def test_fetch_openml_australian(monkeypatch, gzip_response): # sparse dataset # Australian is the only sparse dataset that is reasonably small # as it is inactive, we need to catch the warning. Due to mocking # framework, it is not deactivated in our tests data_id = 292 data_name = 'Australian' data_version = 1 target_column = 'Y' # Not all original instances included for space reasons expected_observations = 85 expected_features = 14 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Version 1 of dataset Australian is inactive,", _fetch_dataset_from_openml, **{ 'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': True, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': False } # numpy specific check )
def test_fetch_openml_iris(monkeypatch, gzip_response): # classification dataset with numeric only columns data_id = 61 data_name = 'iris' data_version = 1 target_column = 'class' expected_observations = 150 expected_features = 4 expected_missing = 0 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "Multiple active versions of the dataset matching the name" " iris exist. Versions may be fundamentally different, " "returning version 1.", _fetch_dataset_from_openml, **{ 'data_id': data_id, 'data_name': data_name, 'data_version': data_version, 'target_column': target_column, 'expected_observations': expected_observations, 'expected_features': expected_features, 'expected_missing': expected_missing, 'expect_sparse': False, 'expected_data_dtype': np.float64, 'expected_target_dtype': object, 'compare_default_target': True })
def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10]) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels)
def test_lda_dimension_warning(n_classes, n_features): # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [ max_components + 1, max(n_features, n_classes - 1) + 1 ]: # if n_components > min(n_classes - 1, n_features), raise warning # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be larger than min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) future_msg = ("In version 0.23, setting n_components > min(" "n_features, n_classes - 1) will raise a " "ValueError. You should set n_components to None" " (default), or a value smaller or equal to " "min(n_features, n_classes - 1).") assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
def test_deprecated_calinski_harabaz_score(): depr_message = ("Function 'calinski_harabaz_score' has been renamed " "to 'calinski_harabasz_score' " "and will be removed in version 0.23.") assert_warns_message(DeprecationWarning, depr_message, calinski_harabaz_score, np.ones( (10, 2)), [0] * 5 + [1] * 5)
def test_pickle_version_warning_is_issued_upon_different_version(): iris = datasets.load_iris() tree = TreeBadVersion().fit(iris.data, iris.target) tree_pickle_other = pickle.dumps(tree) message = pickle_error_message.format(estimator="TreeBadVersion", old_version="something", current_version=mrex.__version__) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
def test_overrided_gram_matrix(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) Gram = X.T.dot(X) clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram) assert_warns_message( UserWarning, "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", clf.fit, X, y)
def test_percentile_numeric_stability(): X = np.array([0.05, 0.05, 0.95]).reshape(-1, 1) bin_edges = np.array([0.05, 0.23, 0.41, 0.59, 0.77, 0.95]) Xt = np.array([0, 0, 4]).reshape(-1, 1) kbd = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile') msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 " "are removed. Consider decreasing the number of bins.") assert_warns_message(UserWarning, msg, kbd.fit, X) assert_array_almost_equal(kbd.bin_edges_[0], bin_edges) assert_array_almost_equal(kbd.transform(X), Xt)
def test_dataset_with_openml_warning(monkeypatch, gzip_response): data_id = 3 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML raised a warning on the dataset. It might be unusable. " "Warning:", fetch_openml, data_id=data_id, cache=False)
def test_n_neighbors_attribute(): X = iris.data clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X) assert clf.n_neighbors_ == X.shape[0] - 1 clf = neighbors.LocalOutlierFactor(n_neighbors=500) assert_warns_message(UserWarning, "n_neighbors will be set to (n_samples - 1)", clf.fit, X) assert clf.n_neighbors_ == X.shape[0] - 1
def test_dataset_with_openml_error(monkeypatch, gzip_response): data_id = 1 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) assert_warns_message( UserWarning, "OpenML registered a problem with the dataset. It might be unusable. " "Error:", fetch_openml, data_id=data_id, cache=False)
def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. # https://github.com/scikit-learn/scikit-learn/issues/10948 pd = importorskip("pandas") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object) assert_warns_message(DataConversionWarning, "Data with input dtype object were all converted to " "float64.", check_array, df, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df, dtype='object', warn_on_dtype=True) assert len(record) == 0 # Also check that it raises a warning for mixed dtypes in a DataFrame. df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=np.float64, warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype='numeric', warn_on_dtype=True) assert_warns(DataConversionWarning, check_array, df_mixed, dtype=object, warn_on_dtype=True) # Even with numerical dtypes, a conversion can be made because dtypes are # uniformized throughout the array. df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]]) assert_warns(DataConversionWarning, check_array, df_mixed_numeric, dtype='numeric', warn_on_dtype=True) with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 check_array(df_mixed_numeric.astype(int), dtype='numeric', warn_on_dtype=True) assert len(record) == 0
def test_same_min_max(strategy): warnings.simplefilter("always") X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]]) est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal') assert_warns_message( UserWarning, "Feature 0 is constant and will be replaced " "with 0.", est.fit, X) assert est.n_bins_[0] == 1 # replace the feature with zeros Xt = est.transform(X) assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
def test_transform_target_regressor_invertible(): X, y = friedman regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log, check_inverse=True) assert_warns_message(UserWarning, "The provided functions or transformer" " are not strictly inverse of each other.", regr.fit, X, y) regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log) regr.set_params(check_inverse=False) assert_no_warnings(regr.fit, X, y)
def test_max_samples_attribute(): X = iris.data clf = IsolationForest().fit(X) assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=500) assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", clf.fit, X) assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=0.4).fit(X) assert clf.max_samples_ == 0.4*X.shape[0]
def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle(): iris = datasets.load_iris() # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert b"version" not in tree_pickle_noversion message = pickle_error_message.format(estimator="TreeNoVersion", old_version="pre-0.18", current_version=mrex.__version__) # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion)
def test_linearsvx_loss_penalty_deprecations(): X, y = [[0.0], [1.0]], [0, 1] msg = ("loss='%s' has been deprecated in favor of " "loss='%s' as of 0.16. Backward compatibility" " for the %s will be removed in %s") # LinearSVC # loss l1 --> hinge assert_warns_message(DeprecationWarning, msg % ("l1", "hinge", "loss='l1'", "0.23"), svm.LinearSVC(loss="l1").fit, X, y) # loss l2 --> squared_hinge assert_warns_message(DeprecationWarning, msg % ("l2", "squared_hinge", "loss='l2'", "0.23"), svm.LinearSVC(loss="l2").fit, X, y) # LinearSVR # loss l1 --> epsilon_insensitive assert_warns_message( DeprecationWarning, msg % ("l1", "epsilon_insensitive", "loss='l1'", "0.23"), svm.LinearSVR(loss="l1").fit, X, y) # loss l2 --> squared_epsilon_insensitive assert_warns_message( DeprecationWarning, msg % ("l2", "squared_epsilon_insensitive", "loss='l2'", "0.23"), svm.LinearSVR(loss="l2").fit, X, y)
def test_multi_task_lasso_and_enet(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] # Y_test = np.c_[y_test, y_test] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1) assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
def test_multilabel_binarizer_unknown_class(): mlb = MultiLabelBinarizer() y = [[1, 2]] Y = np.array([[1, 0], [0, 1]]) w = 'unknown class(es) [0, 4] will be ignored' matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y) Y = np.array([[1, 0, 0], [0, 1, 0]]) mlb = MultiLabelBinarizer(classes=[1, 2, 3]) matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y)
def test_warn_ignore_attribute(monkeypatch, gzip_response): data_id = 40966 expected_row_id_msg = "target_column={} has flag is_row_identifier." expected_ignore_msg = "target_column={} has flag is_ignore." _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # single column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column='MouseID', cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column='Genotype', cache=False) # multi column test assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'), fetch_openml, data_id=data_id, target_column=['MouseID', 'class'], cache=False) assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'), fetch_openml, data_id=data_id, target_column=['Genotype', 'class'], cache=False)
def test_randomized_svd_sparse_warnings(): # randomized_svd throws a warning for lil and dok matrix rng = np.random.RandomState(42) X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng) n_components = 5 for cls in (sparse.lil_matrix, sparse.dok_matrix): X = cls(X) assert_warns_message(sparse.SparseEfficiencyWarning, "Calculating SVD of a {} is expensive. " "csr_matrix is more efficient.".format( cls.__name__), randomized_svd, X, n_components, n_iter=1, power_iteration_normalizer='none')
def test_mcd_increasing_det_warning(): # Check that a warning is raised if we observe increasing determinants # during the c_step. In theory the sequence of determinants should be # decreasing. Increasing determinants are likely due to ill-conditioned # covariance matrices that result in poor precision matrices. X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1], [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2], [5.2, 3.5, 1.5, 0.2]] mcd = MinCovDet(random_state=1) assert_warns_message(RuntimeWarning, "Determinant has increased", mcd.fit, X)
def test_deprecated(): assert_warns_message(DeprecationWarning, 'qwerty', MockClass1) assert_warns_message(DeprecationWarning, 'mockclass2_method', MockClass2().method) assert_warns_message(DeprecationWarning, 'deprecated', MockClass3) val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function) assert val == 10
def test_check_ci_warn(): x = [0, 1, 2, 3, 4, 5] y = [0, -1, 2, -3, 4, -5] # Check that we got increasing=False and CI interval warning is_increasing = assert_warns_message(UserWarning, "interval", check_increasing, x, y) assert not is_increasing
def test_gaussian_mixture_fit_convergence_warning(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=1) n_components = rand_data.n_components max_iter = 1 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, max_iter=max_iter, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_warns_message( ConvergenceWarning, 'Initialization %d did not converge. ' 'Try different init parameters, ' 'or increase max_iter, tol ' 'or check for degenerate data.' % max_iter, g.fit, X)
def test_less_centers_than_unique_points(): X = np.asarray([[0, 0], [0, 1], [1, 0], [1, 0]]) # last point is duplicated km = KMeans(n_clusters=4).fit(X) # only three distinct points, so only three clusters # can have points assigned to them assert set(km.labels_) == set(range(3)) # k_means should warn that fewer labels than cluster # centers have been used msg = ("Number of distinct clusters (3) found smaller than " "n_clusters (4). Possibly due to duplicate points in X.") assert_warns_message(ConvergenceWarning, msg, k_means, X, sample_weight=None, n_clusters=4)
def test_regressormixin_score_multioutput(): from mrex.linear_model import LinearRegression # no warnings when y_type is continuous X = [[1], [2], [3]] y = [1, 2, 3] reg = LinearRegression().fit(X, y) assert_no_warnings(reg.score, X, y) # warn when y_type is continuous-multioutput y = [[1, 2], [2, 3], [3, 4]] reg = LinearRegression().fit(X, y) msg = ("The default value of multioutput (not exposed in " "score method) will change from 'variance_weighted' " "to 'uniform_average' in 0.23 to keep consistent " "with 'metrics.r2_score'. To specify the default " "value manually and avoid the warning, please " "either call 'metrics.r2_score' directly or make a " "custom scorer with 'metrics.make_scorer' (the " "built-in scorer 'r2' uses " "multioutput='uniform_average').") assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_fetch_openml_inactive(monkeypatch, gzip_response): # fetch inactive dataset by id data_id = 40675 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) glas2 = assert_warns_message(UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=data_id, cache=False) # fetch inactive dataset by name and version assert glas2.data.shape == (163, 9) glas2_by_version = assert_warns_message( UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml, data_id=None, name="glass2", version=1, cache=False) assert int(glas2_by_version.details['id']) == data_id
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True, validate=True) assert_warns_message( UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'.", trans.fit, X) trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True, validate=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense)