Example #1
0
def test_redundant_bins(strategy, expected_bin_edges):
    X = [[0], [0], [0], [0], [3], [3]]
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy)
    msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
           "are removed. Consider decreasing the number of bins.")
    assert_warns_message(UserWarning, msg, kbd.fit, X)
    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
Example #2
0
def test_fetch_openml_australian(monkeypatch, gzip_response):
    # sparse dataset
    # Australian is the only sparse dataset that is reasonably small
    # as it is inactive, we need to catch the warning. Due to mocking
    # framework, it is not deactivated in our tests
    data_id = 292
    data_name = 'Australian'
    data_version = 1
    target_column = 'Y'
    # Not all original instances included for space reasons
    expected_observations = 85
    expected_features = 14
    expected_missing = 0
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "Version 1 of dataset Australian is inactive,",
        _fetch_dataset_from_openml,
        **{
            'data_id': data_id,
            'data_name': data_name,
            'data_version': data_version,
            'target_column': target_column,
            'expected_observations': expected_observations,
            'expected_features': expected_features,
            'expected_missing': expected_missing,
            'expect_sparse': True,
            'expected_data_dtype': np.float64,
            'expected_target_dtype': object,
            'compare_default_target': False
        }  # numpy specific check
    )
Example #3
0
def test_fetch_openml_iris(monkeypatch, gzip_response):
    # classification dataset with numeric only columns
    data_id = 61
    data_name = 'iris'
    data_version = 1
    target_column = 'class'
    expected_observations = 150
    expected_features = 4
    expected_missing = 0

    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "Multiple active versions of the dataset matching the name"
        " iris exist. Versions may be fundamentally different, "
        "returning version 1.", _fetch_dataset_from_openml, **{
            'data_id': data_id,
            'data_name': data_name,
            'data_version': data_version,
            'target_column': target_column,
            'expected_observations': expected_observations,
            'expected_features': expected_features,
            'expected_missing': expected_missing,
            'expect_sparse': False,
            'expected_data_dtype': np.float64,
            'expected_target_dtype': object,
            'compare_default_target': True
        })
Example #4
0
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(
        affinity_propagation, S, preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels)
Example #5
0
def test_lda_dimension_warning(n_classes, n_features):
    # FIXME: Future warning to be removed in 0.23
    rng = check_random_state(0)
    n_samples = 10
    X = rng.randn(n_samples, n_features)
    # we create n_classes labels by repeating and truncating a
    # range(n_classes) until n_samples
    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
    max_components = min(n_features, n_classes - 1)

    for n_components in [max_components - 1, None, max_components]:
        # if n_components <= min(n_classes - 1, n_features), no warning
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        assert_no_warnings(lda.fit, X, y)

    for n_components in [
            max_components + 1,
            max(n_features, n_classes - 1) + 1
    ]:
        # if n_components > min(n_classes - 1, n_features), raise warning
        # We test one unit higher than max_components, and then something
        # larger than both n_features and n_classes - 1 to ensure the test
        # works for any value of n_component
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        msg = ("n_components cannot be larger than min(n_features, "
               "n_classes - 1). Using min(n_features, "
               "n_classes - 1) = min(%d, %d - 1) = %d components." %
               (n_features, n_classes, max_components))
        assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y)
        future_msg = ("In version 0.23, setting n_components > min("
                      "n_features, n_classes - 1) will raise a "
                      "ValueError. You should set n_components to None"
                      " (default), or a value smaller or equal to "
                      "min(n_features, n_classes - 1).")
        assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
Example #6
0
def test_deprecated_calinski_harabaz_score():
    depr_message = ("Function 'calinski_harabaz_score' has been renamed "
                    "to 'calinski_harabasz_score' "
                    "and will be removed in version 0.23.")
    assert_warns_message(DeprecationWarning, depr_message,
                         calinski_harabaz_score, np.ones(
                             (10, 2)), [0] * 5 + [1] * 5)
Example #7
0
def test_pickle_version_warning_is_issued_upon_different_version():
    iris = datasets.load_iris()
    tree = TreeBadVersion().fit(iris.data, iris.target)
    tree_pickle_other = pickle.dumps(tree)
    message = pickle_error_message.format(estimator="TreeBadVersion",
                                          old_version="something",
                                          current_version=mrex.__version__)
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
Example #8
0
def test_overrided_gram_matrix():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    Gram = X.T.dot(X)
    clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram)
    assert_warns_message(
        UserWarning, "Gram matrix was provided but X was centered"
        " to fit intercept, "
        "or X was normalized : recomputing Gram matrix.", clf.fit, X, y)
Example #9
0
def test_percentile_numeric_stability():
    X = np.array([0.05, 0.05, 0.95]).reshape(-1, 1)
    bin_edges = np.array([0.05, 0.23, 0.41, 0.59, 0.77, 0.95])
    Xt = np.array([0, 0, 4]).reshape(-1, 1)
    kbd = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile')
    msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
           "are removed. Consider decreasing the number of bins.")
    assert_warns_message(UserWarning, msg, kbd.fit, X)
    assert_array_almost_equal(kbd.bin_edges_[0], bin_edges)
    assert_array_almost_equal(kbd.transform(X), Xt)
Example #10
0
def test_dataset_with_openml_warning(monkeypatch, gzip_response):
    data_id = 3
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML raised a warning on the dataset. It might be unusable. "
        "Warning:",
        fetch_openml,
        data_id=data_id,
        cache=False)
Example #11
0
def test_n_neighbors_attribute():
    X = iris.data
    clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
    assert clf.n_neighbors_ == X.shape[0] - 1

    clf = neighbors.LocalOutlierFactor(n_neighbors=500)
    assert_warns_message(UserWarning,
                         "n_neighbors will be set to (n_samples - 1)", clf.fit,
                         X)
    assert clf.n_neighbors_ == X.shape[0] - 1
Example #12
0
def test_dataset_with_openml_error(monkeypatch, gzip_response):
    data_id = 1
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML registered a problem with the dataset. It might be unusable. "
        "Error:",
        fetch_openml,
        data_id=data_id,
        cache=False)
Example #13
0
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array,
                         df,
                         dtype=np.float64,
                         warn_on_dtype=True)
    assert_warns(DataConversionWarning,
                 check_array,
                 df,
                 dtype='numeric',
                 warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df, dtype='object', warn_on_dtype=True)
    assert len(record) == 0

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning,
                 check_array,
                 df_mixed,
                 dtype=np.float64,
                 warn_on_dtype=True)
    assert_warns(DataConversionWarning,
                 check_array,
                 df_mixed,
                 dtype='numeric',
                 warn_on_dtype=True)
    assert_warns(DataConversionWarning,
                 check_array,
                 df_mixed,
                 dtype=object,
                 warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning,
                 check_array,
                 df_mixed_numeric,
                 dtype='numeric',
                 warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df_mixed_numeric.astype(int),
                    dtype='numeric',
                    warn_on_dtype=True)
    assert len(record) == 0
Example #14
0
def test_same_min_max(strategy):
    warnings.simplefilter("always")
    X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]])
    est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal')
    assert_warns_message(
        UserWarning, "Feature 0 is constant and will be replaced "
        "with 0.", est.fit, X)
    assert est.n_bins_[0] == 1
    # replace the feature with zeros
    Xt = est.transform(X)
    assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
Example #15
0
def test_transform_target_regressor_invertible():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log,
                                      check_inverse=True)
    assert_warns_message(UserWarning, "The provided functions or transformer"
                         " are not strictly inverse of each other.",
                         regr.fit, X, y)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log)
    regr.set_params(check_inverse=False)
    assert_no_warnings(regr.fit, X, y)
Example #16
0
def test_max_samples_attribute():
    X = iris.data
    clf = IsolationForest().fit(X)
    assert clf.max_samples_ == X.shape[0]

    clf = IsolationForest(max_samples=500)
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         clf.fit, X)
    assert clf.max_samples_ == X.shape[0]

    clf = IsolationForest(max_samples=0.4).fit(X)
    assert clf.max_samples_ == 0.4*X.shape[0]
Example #17
0
def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
    iris = datasets.load_iris()
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert b"version" not in tree_pickle_noversion
    message = pickle_error_message.format(estimator="TreeNoVersion",
                                          old_version="pre-0.18",
                                          current_version=mrex.__version__)
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)
Example #18
0
def test_linearsvx_loss_penalty_deprecations():
    X, y = [[0.0], [1.0]], [0, 1]

    msg = ("loss='%s' has been deprecated in favor of "
           "loss='%s' as of 0.16. Backward compatibility"
           " for the %s will be removed in %s")

    # LinearSVC
    # loss l1 --> hinge
    assert_warns_message(DeprecationWarning,
                         msg % ("l1", "hinge", "loss='l1'", "0.23"),
                         svm.LinearSVC(loss="l1").fit, X, y)

    # loss l2 --> squared_hinge
    assert_warns_message(DeprecationWarning,
                         msg % ("l2", "squared_hinge", "loss='l2'", "0.23"),
                         svm.LinearSVC(loss="l2").fit, X, y)

    # LinearSVR
    # loss l1 --> epsilon_insensitive
    assert_warns_message(
        DeprecationWarning,
        msg % ("l1", "epsilon_insensitive", "loss='l1'", "0.23"),
        svm.LinearSVR(loss="l1").fit, X, y)

    # loss l2 --> squared_epsilon_insensitive
    assert_warns_message(
        DeprecationWarning,
        msg % ("l2", "squared_epsilon_insensitive", "loss='l2'", "0.23"),
        svm.LinearSVR(loss="l2").fit, X, y)
Example #19
0
def test_multi_task_lasso_and_enet():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    # Y_test = np.c_[y_test, y_test]
    clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
    assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
Example #20
0
def test_multilabel_binarizer_unknown_class():
    mlb = MultiLabelBinarizer()
    y = [[1, 2]]
    Y = np.array([[1, 0], [0, 1]])
    w = 'unknown class(es) [0, 4] will be ignored'
    matrix = assert_warns_message(UserWarning, w,
                                  mlb.fit(y).transform, [[4, 1], [2, 0]])
    assert_array_equal(matrix, Y)

    Y = np.array([[1, 0, 0], [0, 1, 0]])
    mlb = MultiLabelBinarizer(classes=[1, 2, 3])
    matrix = assert_warns_message(UserWarning, w,
                                  mlb.fit(y).transform, [[4, 1], [2, 0]])
    assert_array_equal(matrix, Y)
Example #21
0
def test_warn_ignore_attribute(monkeypatch, gzip_response):
    data_id = 40966
    expected_row_id_msg = "target_column={} has flag is_row_identifier."
    expected_ignore_msg = "target_column={} has flag is_ignore."
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_warns_message(UserWarning,
                         expected_row_id_msg.format('MouseID'),
                         fetch_openml,
                         data_id=data_id,
                         target_column='MouseID',
                         cache=False)
    assert_warns_message(UserWarning,
                         expected_ignore_msg.format('Genotype'),
                         fetch_openml,
                         data_id=data_id,
                         target_column='Genotype',
                         cache=False)
    # multi column test
    assert_warns_message(UserWarning,
                         expected_row_id_msg.format('MouseID'),
                         fetch_openml,
                         data_id=data_id,
                         target_column=['MouseID', 'class'],
                         cache=False)
    assert_warns_message(UserWarning,
                         expected_ignore_msg.format('Genotype'),
                         fetch_openml,
                         data_id=data_id,
                         target_column=['Genotype', 'class'],
                         cache=False)
Example #22
0
def test_randomized_svd_sparse_warnings():
    # randomized_svd throws a warning for lil and dok matrix
    rng = np.random.RandomState(42)
    X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng)
    n_components = 5
    for cls in (sparse.lil_matrix, sparse.dok_matrix):
        X = cls(X)
        assert_warns_message(sparse.SparseEfficiencyWarning,
                             "Calculating SVD of a {} is expensive. "
                             "csr_matrix is more efficient.".format(
                                 cls.__name__),
                             randomized_svd,
                             X,
                             n_components,
                             n_iter=1,
                             power_iteration_normalizer='none')
Example #23
0
def test_mcd_increasing_det_warning():
    # Check that a warning is raised if we observe increasing determinants
    # during the c_step. In theory the sequence of determinants should be
    # decreasing. Increasing determinants are likely due to ill-conditioned
    # covariance matrices that result in poor precision matrices.

    X = [[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2],
         [4.6, 3.1, 1.5, 0.2], [5.0, 3.6, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3],
         [5.0, 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1],
         [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.0, 1.4, 0.1],
         [4.3, 3.0, 1.1, 0.1], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3],
         [5.4, 3.4, 1.7, 0.2], [4.6, 3.6, 1.0, 0.2], [5.0, 3.0, 1.6, 0.2],
         [5.2, 3.5, 1.5, 0.2]]

    mcd = MinCovDet(random_state=1)
    assert_warns_message(RuntimeWarning, "Determinant has increased", mcd.fit,
                         X)
Example #24
0
def test_deprecated():
    assert_warns_message(DeprecationWarning, 'qwerty', MockClass1)
    assert_warns_message(DeprecationWarning, 'mockclass2_method',
                         MockClass2().method)
    assert_warns_message(DeprecationWarning, 'deprecated', MockClass3)
    val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function)
    assert val == 10
Example #25
0
def test_check_ci_warn():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, -1, 2, -3, 4, -5]

    # Check that we got increasing=False and CI interval warning
    is_increasing = assert_warns_message(UserWarning, "interval",
                                         check_increasing, x, y)

    assert not is_increasing
Example #26
0
def test_gaussian_mixture_fit_convergence_warning():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=1)
    n_components = rand_data.n_components
    max_iter = 1
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=1,
                            max_iter=max_iter,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type)
        assert_warns_message(
            ConvergenceWarning, 'Initialization %d did not converge. '
            'Try different init parameters, '
            'or increase max_iter, tol '
            'or check for degenerate data.' % max_iter, g.fit, X)
Example #27
0
def test_less_centers_than_unique_points():
    X = np.asarray([[0, 0], [0, 1], [1, 0], [1,
                                             0]])  # last point is duplicated

    km = KMeans(n_clusters=4).fit(X)

    # only three distinct points, so only three clusters
    # can have points assigned to them
    assert set(km.labels_) == set(range(3))

    # k_means should warn that fewer labels than cluster
    # centers have been used
    msg = ("Number of distinct clusters (3) found smaller than "
           "n_clusters (4). Possibly due to duplicate points in X.")
    assert_warns_message(ConvergenceWarning,
                         msg,
                         k_means,
                         X,
                         sample_weight=None,
                         n_clusters=4)
Example #28
0
def test_regressormixin_score_multioutput():
    from mrex.linear_model import LinearRegression
    # no warnings when y_type is continuous
    X = [[1], [2], [3]]
    y = [1, 2, 3]
    reg = LinearRegression().fit(X, y)
    assert_no_warnings(reg.score, X, y)
    # warn when y_type is continuous-multioutput
    y = [[1, 2], [2, 3], [3, 4]]
    reg = LinearRegression().fit(X, y)
    msg = ("The default value of multioutput (not exposed in "
           "score method) will change from 'variance_weighted' "
           "to 'uniform_average' in 0.23 to keep consistent "
           "with 'metrics.r2_score'. To specify the default "
           "value manually and avoid the warning, please "
           "either call 'metrics.r2_score' directly or make a "
           "custom scorer with 'metrics.make_scorer' (the "
           "built-in scorer 'r2' uses "
           "multioutput='uniform_average').")
    assert_warns_message(FutureWarning, msg, reg.score, X, y)
Example #29
0
def test_fetch_openml_inactive(monkeypatch, gzip_response):
    # fetch inactive dataset by id
    data_id = 40675
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    glas2 = assert_warns_message(UserWarning,
                                 "Version 1 of dataset glass2 is inactive,",
                                 fetch_openml,
                                 data_id=data_id,
                                 cache=False)
    # fetch inactive dataset by name and version
    assert glas2.data.shape == (163, 9)
    glas2_by_version = assert_warns_message(
        UserWarning,
        "Version 1 of dataset glass2 is inactive,",
        fetch_openml,
        data_id=None,
        name="glass2",
        version=1,
        cache=False)
    assert int(glas2_by_version.details['id']) == data_id
Example #30
0
def test_check_inverse():
    X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))

    X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)]

    for X in X_list:
        if sparse.issparse(X):
            accept_sparse = True
        else:
            accept_sparse = False
        trans = FunctionTransformer(func=np.sqrt,
                                    inverse_func=np.around,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True,
                                    validate=True)
        assert_warns_message(
            UserWarning, "The provided functions are not strictly"
            " inverse of each other. If you are sure you"
            " want to proceed regardless, set"
            " 'check_inverse=False'.", trans.fit, X)

        trans = FunctionTransformer(func=np.expm1,
                                    inverse_func=np.log1p,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True,
                                    validate=True)
        Xt = assert_no_warnings(trans.fit_transform, X)
        assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))

    # check that we don't check inverse when one of the func or inverse is not
    # provided.
    trans = FunctionTransformer(func=np.expm1,
                                inverse_func=None,
                                check_inverse=True,
                                validate=True)
    assert_no_warnings(trans.fit, X_dense)
    trans = FunctionTransformer(func=None,
                                inverse_func=np.expm1,
                                check_inverse=True,
                                validate=True)
    assert_no_warnings(trans.fit, X_dense)