def test_deprecated_grid_search_iid(self):
        depr_message = (
            "The default of the `iid` parameter will change from True "
            "to False in version 0.22")
        X, y = make_blobs(n_samples=54, random_state=0, centers=2)
        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]},
                            cv=3)
        # no warning with equally sized test sets
        assert_no_warnings(grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]},
                            cv=5)
        # warning because 54 % 5 != 0
        assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]},
                            cv=2)
        # warning because stratification into two classes and 27 % 2 != 0
        assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]},
                            cv=KFold(2))
        # no warning because no stratification and 54 % 2 == 0
        assert_no_warnings(grid.fit, X, y)
Beispiel #2
0
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100,
                               n_classes=2,
                               flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(),
                                              grid,
                                              scoring='roc_auc').fit(X, y)
    search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # ChangedBehaviourWarning occurred previously (prior to #9005)
    score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
    score_accuracy = assert_no_warnings(search_accuracy.score, X, y)
    score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score,
                                            X, y)
    score_auc = assert_no_warnings(search_auc.score, X, y)

    # ensure the test is sane
    assert_true(score_auc < 1.0)
    assert_true(score_accuracy < 1.0)
    assert_not_equal(score_auc, score_accuracy)

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
Beispiel #3
0
def test_one_hot_encoder_deprecationwarnings():
    for X in [[[3, 2, 1], [0, 1, 1]], [[3., 2., 1.], [0., 1., 1.]]]:
        enc = OneHotEncoder()
        assert_warns_message(FutureWarning, "handling of integer", enc.fit, X)
        enc = OneHotEncoder()
        assert_warns_message(FutureWarning, "handling of integer",
                             enc.fit_transform, X)

        # check it still works correctly as well
        with ignore_warnings(category=FutureWarning):
            X_trans = enc.fit_transform(X).toarray()
        res = [[0., 1., 0., 1., 1.], [1., 0., 1., 0., 1.]]
        assert_array_equal(X_trans, res)

        # check deprecated attributes
        assert_warns(DeprecationWarning, lambda: enc.active_features_)
        assert_warns(DeprecationWarning, lambda: enc.feature_indices_)
        assert_warns(DeprecationWarning, lambda: enc.n_values_)

        # check no warning is raised if keyword is specified
        enc = OneHotEncoder(categories='auto')
        assert_no_warnings(enc.fit, X)
        enc = OneHotEncoder(categories='auto')
        assert_no_warnings(enc.fit_transform, X)
        X_trans = enc.fit_transform(X).toarray()
        assert_array_equal(X_trans, res)

        # check there is also a warning if the default is passed
        enc = OneHotEncoder(n_values='auto', handle_unknown='ignore')
        assert_warns(DeprecationWarning, enc.fit, X)

    X = np.array([['cat1', 'cat2']], dtype=object).T
    enc = OneHotEncoder(categorical_features='all')
    assert_warns(DeprecationWarning, enc.fit, X)
Beispiel #4
0
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True)

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df_mixed_numeric.astype(int),
                       dtype='numeric', warn_on_dtype=True)
def test_feature_agglomeration():
    n_clusters = 1
    X = np.array([0, 0, 1]).reshape(1, 3)  # (n_samples, n_features)

    agglo_mean = FeatureAgglomeration(n_clusters=n_clusters,
                                      pooling_func=np.mean)
    agglo_median = FeatureAgglomeration(n_clusters=n_clusters,
                                        pooling_func=np.median)
    assert_no_warnings(agglo_mean.fit, X)
    assert_no_warnings(agglo_median.fit, X)
    assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters)
    assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters)
    assert_true(np.size(agglo_mean.labels_) == X.shape[1])
    assert_true(np.size(agglo_median.labels_) == X.shape[1])

    # Test transform
    Xt_mean = agglo_mean.transform(X)
    Xt_median = agglo_median.transform(X)
    assert_true(Xt_mean.shape[1] == n_clusters)
    assert_true(Xt_median.shape[1] == n_clusters)
    assert_true(Xt_mean == np.array([1 / 3.]))
    assert_true(Xt_median == np.array([0.]))

    # Test inverse transform
    X_full_mean = agglo_mean.inverse_transform(Xt_mean)
    X_full_median = agglo_median.inverse_transform(Xt_median)
    assert_true(np.unique(X_full_mean[0]).size == n_clusters)
    assert_true(np.unique(X_full_median[0]).size == n_clusters)

    assert_array_almost_equal(agglo_mean.transform(X_full_mean),
                              Xt_mean)
    assert_array_almost_equal(agglo_median.transform(X_full_median),
                              Xt_median)
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid,
                                              scoring='roc_auc').fit(X, y)
    search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
    score_accuracy = assert_warns(ChangedBehaviorWarning,
                                  search_accuracy.score, X, y)
    score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score,
                                            X, y)
    score_auc = assert_warns(ChangedBehaviorWarning,
                             search_auc.score, X, y)
    # ensure the test is sane
    assert_true(score_auc < 1.0)
    assert_true(score_accuracy < 1.0)
    assert_not_equal(score_auc, score_accuracy)

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
Beispiel #7
0
def test_skope_rules_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data
    y = iris.target
    y = (y != 0)

    # Test similarity_thres:
    assert_raises(ValueError, SkopeRules(similarity_thres=2).fit, X, y)
    assert_raises(ValueError, SkopeRules(similarity_thres=0).fit, X, y)

    # Test max_samples
    assert_raises(ValueError, SkopeRules(max_samples=-1).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=0.0).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=2.0).fit, X, y)
    # explicitly setting max_samples > n_samples should result in a warning.
    assert_warns_message(
        UserWarning, "max_samples will be set to n_samples for estimation",
        SkopeRules(max_samples=1000).fit, X, y)
    assert_no_warnings(SkopeRules(max_samples=np.int64(2)).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples='foobar').fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=1.5).fit, X, y)
    assert_raises(ValueError, SkopeRules().fit(X, y).predict, X[:, 1:])
    assert_raises(ValueError,
                  SkopeRules().fit(X, y).decision_function, X[:, 1:])
    assert_raises(ValueError, SkopeRules().fit(X, y).rules_vote, X[:, 1:])
    assert_raises(ValueError,
                  SkopeRules().fit(X, y).separate_rules_score, X[:, 1:])
def test_one_hot_encoder_invalid_params():
    enc = OneHotEncoder(drop='second')
    assert_raises_regex(
        ValueError,
        "Wrong input for parameter `drop`.",
        enc.fit, [["Male"], ["Female"]])

    enc = OneHotEncoder(handle_unknown='ignore', drop='first')
    assert_raises_regex(
        ValueError,
        "`handle_unknown` must be 'error'",
        enc.fit, [["Male"], ["Female"]])

    enc = OneHotEncoder(drop='first')
    assert_raises_regex(
        ValueError,
        "The handling of integer data will change in version",
        enc.fit, [[1], [2]])

    enc = OneHotEncoder(drop='first', categories='auto')
    assert_no_warnings(enc.fit_transform, [[1], [2]])

    enc = OneHotEncoder(drop=np.asarray('b', dtype=object))
    assert_raises_regex(
        ValueError,
        "Wrong input for parameter `drop`.",
        enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]])

    enc = OneHotEncoder(drop=['ghi', 3, 59])
    assert_raises_regex(
        ValueError,
        "The following categories were supposed",
        enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]])
Beispiel #9
0
def test_no_empty_slice_warning():
    # test if we avoid numpy warnings for computing over empty arrays
    n_components = 10
    n_features = n_components + 2  # anything > n_comps triggered it in 0.16
    X = np.random.uniform(-1, 1, size=(n_components, n_features))
    pca = PCA(n_components=n_components)
    assert_no_warnings(pca.fit, X)
def test_mnb_prior_unobserved_targets():
    # test smoothing of prior for yet unobserved targets

    # Create toy training data
    X = np.array([[0, 1], [1, 0]])
    y = np.array([0, 1])

    clf = MultinomialNB()

    assert_no_warnings(
        clf.partial_fit, X, y, classes=[0, 1, 2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 0

    # add a training example with previously unobserved class
    assert_no_warnings(
        clf.partial_fit, [[1, 1]], [2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 2
Beispiel #11
0
def test_pickle_version_warning():
    # check that warnings are raised when unpickling in a different version

    # first, check no warning when in the same version:
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_true(b"version" in tree_pickle)
    assert_no_warnings(pickle.loads, tree_pickle)

    # check that warning is raised on different version
    tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(),
                                            b"something")
    message = ("Trying to unpickle estimator DecisionTreeClassifier from "
               "version {0} when using version {1}. This might lead to "
               "breaking code or invalid results. "
               "Use at your own risk.".format("something",
                                              sklearn.__version__))
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

    # check that not including any version also works:
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert_false(b"version" in tree_pickle_noversion)
    message = message.replace("something", "pre-0.18")
    message = message.replace("DecisionTreeClassifier", "TreeNoVersion")
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)

    # check that no warning is raised for external estimators
    TreeNoVersion.__module__ = "notsklearn"
    assert_no_warnings(pickle.loads, tree_pickle_noversion)
Beispiel #12
0
def test_lda_dimension_warning(n_classes, n_features):
    # FIXME: Future warning to be removed in 0.23
    rng = check_random_state(0)
    n_samples = 10
    X = rng.randn(n_samples, n_features)
    # we create n_classes labels by repeating and truncating a
    # range(n_classes) until n_samples
    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
    max_components = min(n_features, n_classes - 1)

    for n_components in [max_components - 1, None, max_components]:
        # if n_components <= min(n_classes - 1, n_features), no warning
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        assert_no_warnings(lda.fit, X, y)

    for n_components in [max_components + 1,
                         max(n_features, n_classes - 1) + 1]:
        # if n_components > min(n_classes - 1, n_features), raise warning
        # We test one unit higher than max_components, and then something
        # larger than both n_features and n_classes - 1 to ensure the test
        # works for any value of n_component
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        msg = ("n_components cannot be larger than min(n_features, "
               "n_classes - 1). Using min(n_features, "
               "n_classes - 1) = min(%d, %d - 1) = %d components." %
               (n_features, n_classes, max_components))
        assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y)
        future_msg = ("In version 0.23, setting n_components > min("
                      "n_features, n_classes - 1) will raise a "
                      "ValueError. You should set n_components to None"
                      " (default), or a value smaller or equal to "
                      "min(n_features, n_classes - 1).")
        assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
Beispiel #13
0
def test_one_hot_encoder_invalid_params():
    enc = OneHotEncoder(drop='second')
    assert_raises_regex(
        ValueError,
        "Wrong input for parameter `drop`.",
        enc.fit, [["Male"], ["Female"]])

    enc = OneHotEncoder(handle_unknown='ignore', drop='first')
    assert_raises_regex(
        ValueError,
        "`handle_unknown` must be 'error'",
        enc.fit, [["Male"], ["Female"]])

    enc = OneHotEncoder(drop='first')
    assert_raises_regex(
        ValueError,
        "The handling of integer data will change in version",
        enc.fit, [[1], [2]])

    enc = OneHotEncoder(drop='first', categories='auto')
    assert_no_warnings(enc.fit_transform, [[1], [2]])

    enc = OneHotEncoder(drop=np.asarray('b', dtype=object))
    assert_raises_regex(
        ValueError,
        "Wrong input for parameter `drop`.",
        enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]])

    enc = OneHotEncoder(drop=['ghi', 3, 59])
    assert_raises_regex(
        ValueError,
        "The following categories were supposed",
        enc.fit, [['abc', 2, 55], ['def', 1, 55], ['def', 3, 59]])
    def test_return_train_score_warn_0_19(self):
        # Test that warnings are raised. Will be removed in 0.21

        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)
        grid = {'C': [1, 2]}

        estimators = [GridSearchCV(LinearSVC(random_state=0), grid),
                      RandomizedSearchCV(LinearSVC(random_state=0), grid,
                                         n_iter=2)]

        result = {}
        for estimator in estimators:
            for val in [True, False, 'warn']:
                estimator.set_params(return_train_score=val)
                result[val] = assert_no_warnings(estimator.fit, X, y).cv_results_

        train_keys = ['split0_train_score', 'split1_train_score',
                      'split2_train_score', 'mean_train_score', 'std_train_score']
        for key in train_keys:
            msg = (
                'You are accessing a training score ({!r}), '
                'which will not be available by default '
                'any more in 0.21. If you need training scores, '
                'please set return_train_score=True').format(key)
            train_score = assert_warns_message(FutureWarning, msg,
                                               result['warn'].get, key)
            assert np.allclose(train_score, result[True][key])
            assert key not in result[False]

        for key in result['warn']:
            if key not in train_keys:
                assert_no_warnings(result['warn'].get, key)
def test_no_warning_for_zero_mse():
    # LassoLarsIC should not warn for log of zero MSE.
    y = np.arange(10, dtype=float)
    X = y.reshape(-1, 1)
    lars = linear_model.LassoLarsIC(normalize=False)
    assert_no_warnings(lars.fit, X, y)
    assert_true(np.any(np.isinf(lars.criterion_)))
def test_no_empty_slice_warning():
    # test if we avoid numpy warnings for computing over empty arrays
    n_components = 10
    n_features = n_components + 2  # anything > n_comps triggered it in 0.16
    X = np.random.uniform(-1, 1, size=(n_components, n_features))
    pca = PCA(n_components=n_components)
    assert_no_warnings(pca.fit, X)
Beispiel #17
0
def test_vectorizer_stop_words_inconsistent():
    if PY2:
        lstr = "[u'and', u'll', u've']"
    else:
        lstr = "['and', 'll', 've']"
    message = ('Your stop_words may be inconsistent with your '
               'preprocessing. Tokenizing the stop words generated '
               'tokens %s not in stop_words.' % lstr)
    for vec in [CountVectorizer(),
                TfidfVectorizer(), HashingVectorizer()]:
        vec.set_params(stop_words=["you've", "you", "you'll", 'AND'])
        assert_warns_message(UserWarning, message, vec.fit_transform,
                             ['hello world'])
        # reset stop word validation
        del vec._stop_words_id
        assert _check_stop_words_consistency(vec) is False

    # Only one warning per stop list
    assert_no_warnings(vec.fit_transform, ['hello world'])
    assert _check_stop_words_consistency(vec) is None

    # Test caching of inconsistency assessment
    vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND'])
    assert_warns_message(UserWarning, message, vec.fit_transform,
                         ['hello world'])
Beispiel #18
0
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100,
                               n_classes=2,
                               flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(),
                                              grid,
                                              scoring='roc_auc').fit(X, y)
    search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
    score_accuracy = assert_warns(ChangedBehaviorWarning,
                                  search_accuracy.score, X, y)
    score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score,
                                            X, y)
    score_auc = assert_warns(ChangedBehaviorWarning, search_auc.score, X, y)
    # ensure the test is sane
    assert_true(score_auc < 1.0)
    assert_true(score_accuracy < 1.0)
    assert_not_equal(score_auc, score_accuracy)

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True)

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df_mixed_numeric.astype(int),
                       dtype='numeric', warn_on_dtype=True)
def test_grid_search_score_method():
    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid,
                                              scoring='roc_auc').fit(X, y)
    search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # ChangedBehaviourWarning occurred previously (prior to #9005)
    score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
    score_accuracy = assert_no_warnings(search_accuracy.score, X, y)
    score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score,
                                            X, y)
    score_auc = assert_no_warnings(search_auc.score, X, y)

    # ensure the test is sane
    assert_true(score_auc < 1.0)
    assert_true(score_accuracy < 1.0)
    assert_not_equal(score_auc, score_accuracy)

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)
def test_pickle_version_warning():
    # check that warnings are raised when unpickling in a different version

    # first, check no warning when in the same version:
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_no_warnings(pickle.loads, tree_pickle)

    # check that warning is raised on different version
    tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(),
                                            b"something")
    message = ("Trying to unpickle estimator DecisionTreeClassifier from version "
               "{0} when using version {1}. This might lead to breaking "
               "code or invalid results. Use at your own risk.".format(
                   "something", sklearn.__version__))
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

    # check that not including any version also works:
    # don't do this at home, kids
    from sklearn.base import BaseEstimator
    old_getstate = BaseEstimator.__getstate__
    del BaseEstimator.__getstate__
    # tree lost its getstate, like pre-0.18
    assert_false(hasattr(tree, "__getstate__"))
    tree_pickle_noversion = pickle.dumps(tree)
    message = message.replace("something", "pre-0.18")
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion)
    BaseEstimator.__getstate__ = old_getstate

    # check that no warning is raised for external estimators
    DecisionTreeClassifier.__module__ = "notsklearn"
    assert_no_warnings(pickle.loads, tree_pickle_noversion)
Beispiel #22
0
def test_no_warning_for_zero_mse():
    # LassoLarsIC should not warn for log of zero MSE.
    y = np.arange(10, dtype=float)
    X = y.reshape(-1, 1)
    lars = linear_model.LassoLarsIC(normalize=False)
    assert_no_warnings(lars.fit, X, y)
    assert_true(np.any(np.isinf(lars.criterion_)))
def test_pickle_version_warning():
    # check that warnings are raised when unpickling in a different version

    # first, check no warning when in the same version:
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_true(b"version" in tree_pickle)
    assert_no_warnings(pickle.loads, tree_pickle)

    # check that warning is raised on different version
    tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(),
                                            b"something")
    message = ("Trying to unpickle estimator DecisionTreeClassifier from "
               "version {0} when using version {1}. This might lead to "
               "breaking code or invalid results. "
               "Use at your own risk.".format("something",
                                              sklearn.__version__))
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

    # check that not including any version also works:
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert_false(b"version" in tree_pickle_noversion)
    message = message.replace("something", "pre-0.18")
    message = message.replace("DecisionTreeClassifier", "TreeNoVersion")
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)

    # check that no warning is raised for external estimators
    TreeNoVersion.__module__ = "notsklearn"
    assert_no_warnings(pickle.loads, tree_pickle_noversion)
Beispiel #24
0
def test_prf_average_compat():
    # Ensure warning if f1_score et al.'s average is implicit for multiclass
    y_true = [1, 2, 3, 3]
    y_pred = [1, 2, 3, 1]
    y_true_bin = [0, 1, 1]
    y_pred_bin = [0, 1, 0]

    for metric in [precision_score, recall_score, f1_score,
                   partial(fbeta_score, beta=2)]:
        score = assert_warns(DeprecationWarning, metric, y_true, y_pred)
        score_weighted = assert_no_warnings(metric, y_true, y_pred,
                                            average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default')

        # check binary passes without warning
        assert_no_warnings(metric, y_true_bin, y_pred_bin)

        # but binary with pos_label=None should behave like multiclass
        score = assert_warns(DeprecationWarning, metric,
                             y_true_bin, y_pred_bin, pos_label=None)
        score_weighted = assert_no_warnings(metric, y_true_bin, y_pred_bin,
                                            pos_label=None, average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default with '
                     'binary data and pos_label=None')
Beispiel #25
0
def test_gamma_auto():
    X, y = [[0.0, 1.2], [1.0, 1.3]], [0, 1]

    msg = ("The default value of gamma will change from 'auto' to 'scale' in "
           "version 0.22 to account better for unscaled features. Set gamma "
           "explicitly to 'auto' or 'scale' to avoid this warning.")

    assert_warns_message(FutureWarning, msg, svm.SVC().fit, X, y)
    assert_no_warnings(svm.SVC(kernel='linear').fit, X, y)
    assert_no_warnings(svm.SVC(kernel='precomputed').fit, X, y)
Beispiel #26
0
def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
    iris = datasets.load_iris()
    tree = TreeNoVersion().fit(iris.data, iris.target)
    tree_pickle_noversion = pickle.dumps(tree)
    try:
        module_backup = TreeNoVersion.__module__
        TreeNoVersion.__module__ = "notsklearn"
        assert_no_warnings(pickle.loads, tree_pickle_noversion)
    finally:
        TreeNoVersion.__module__ = module_backup
Beispiel #27
0
def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
    iris = datasets.load_iris()
    tree = TreeNoVersion().fit(iris.data, iris.target)
    tree_pickle_noversion = pickle.dumps(tree)
    try:
        module_backup = TreeNoVersion.__module__
        TreeNoVersion.__module__ = "notsklearn"
        assert_no_warnings(pickle.loads, tree_pickle_noversion)
    finally:
        TreeNoVersion.__module__ = module_backup
Beispiel #28
0
def test_gamma_auto():
    X, y = [[0.0, 1.2], [1.0, 1.3]], [0, 1]

    msg = ("The default value of gamma will change from 'auto' to 'scale' in "
           "version 0.22 to account better for unscaled features. Set gamma "
           "explicitly to 'auto' or 'scale' to avoid this warning.")

    assert_warns_message(FutureWarning, msg,
                         svm.SVC().fit, X, y)
    assert_no_warnings(svm.SVC(kernel='linear').fit, X, y)
    assert_no_warnings(svm.SVC(kernel='precomputed').fit, X, y)
Beispiel #29
0
def test_gamma_scale():
    X, y = [[0.], [1.]], [0, 1]

    clf = svm.SVC(gamma='scale')
    assert_no_warnings(clf.fit, X, y)
    assert_equal(clf._gamma, 2.)

    # X_std ~= 1 shouldn't raise warning, for when
    # gamma is not explicitly set.
    X, y = [[1, 2], [3, 2 * np.sqrt(6) / 3 + 2]], [0, 1]
    assert_no_warnings(clf.fit, X, y)
Beispiel #30
0
def test_transform_target_regressor_invertible():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log,
                                      check_inverse=True)
    assert_warns_message(UserWarning, "The provided functions or transformer"
                         " are not strictly inverse of each other.",
                         regr.fit, X, y)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log)
    regr.set_params(check_inverse=False)
    assert_no_warnings(regr.fit, X, y)
Beispiel #31
0
def test_transform_target_regressor_invertible():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log,
                                      check_inverse=True)
    assert_warns_message(UserWarning, "The provided functions or transformer"
                         " are not strictly inverse of each other.",
                         regr.fit, X, y)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log)
    regr.set_params(check_inverse=False)
    assert_no_warnings(regr.fit, X, y)
def test_recall_warnings():
    assert_no_warnings(recall_score,
                       np.array([[1, 1], [1, 1]]),
                       np.array([[0, 0], [0, 0]]),
                       average='micro')
    clean_warning_registry()
    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')
        recall_score(np.array([[0, 0], [0, 0]]),
                     np.array([[1, 1], [1, 1]]),
                     average='micro')
        assert_equal(str(record.pop().message),
                     'Recall is ill-defined and '
                     'being set to 0.0 due to no true samples.')
Beispiel #33
0
def test_recall_warnings():
    assert_no_warnings(recall_score,
                       np.array([[1, 1], [1, 1]]),
                       np.array([[0, 0], [0, 0]]),
                       average='micro')

    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')
        recall_score(np.array([[0, 0], [0, 0]]),
                     np.array([[1, 1], [1, 1]]),
                     average='micro')
        assert_equal(
            str(record.pop().message), 'Recall is ill-defined and '
            'being set to 0.0 due to no true samples.')
Beispiel #34
0
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting max_samples > n_samples
    # should result in a warning. If not set explicitly there should be no warning
    assert_warns_message(
        UserWarning, "max_samples will be set to n_samples for estimation",
        IsolationForest(max_samples=1000).fit, X)
    assert_no_warnings(IsolationForest(max_samples='auto').fit, X)
    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
def test_precision_warnings():
    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')

        precision_score(np.array([[1, 1], [1, 1]]),
                        np.array([[0, 0], [0, 0]]),
                        average='micro')
        assert_equal(str(record.pop().message),
                     'Precision is ill-defined and '
                     'being set to 0.0 due to no predicted samples.')

    assert_no_warnings(precision_score,
                       np.array([[0, 0], [0, 0]]),
                       np.array([[1, 1], [1, 1]]),
                       average='micro')
Beispiel #36
0
def test_non_negative_factorization_checking():
    A = np.ones((2, 2))
    # Test parameters checking is public function
    nnmf = non_negative_factorization
    assert_no_warnings(nnmf, A, A, A, np.int64(1))
    msg = "Number of components must be a positive integer; " "got (n_components=1.5)"
    assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5)
    msg = "Number of components must be a positive integer; " "got (n_components='2')"
    assert_raise_message(ValueError, msg, nnmf, A, A, A, "2")
    msg = "Negative values in data passed to NMF (input H)"
    assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, "custom")
    msg = "Negative values in data passed to NMF (input W)"
    assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, "custom")
    msg = "Array passed to NMF (input H) is full of zeros"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, "custom")
Beispiel #37
0
def test_precision_warnings():
    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')

        precision_score(np.array([[1, 1], [1, 1]]),
                        np.array([[0, 0], [0, 0]]),
                        average='micro')
        assert_equal(
            str(record.pop().message), 'Precision is ill-defined and '
            'being set to 0.0 due to no predicted samples.')

    assert_no_warnings(precision_score,
                       np.array([[0, 0], [0, 0]]),
                       np.array([[1, 1], [1, 1]]),
                       average='micro')
def test_non_negative_factorization_checking():
    A = np.ones((2, 2))
    # Test parameters checking is public function
    nnmf = non_negative_factorization
    assert_no_warnings(nnmf, A, A, A, np.int64(1))
    msg = "Number of components must be a positive integer; got (n_components=1.5)"
    assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5)
    msg = "Number of components must be a positive integer; got (n_components='2')"
    assert_raise_message(ValueError, msg, nnmf, A, A, A, '2')
    msg = "Negative values in data passed to NMF (input H)"
    assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom')
    msg = "Negative values in data passed to NMF (input W)"
    assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom')
    msg = "Array passed to NMF (input H) is full of zeros"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom')
Beispiel #39
0
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting max_samples > n_samples
    # should result in a warning. If not set explicitly there should be no warning
    assert_warns_message(
        UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X
    )
    assert_no_warnings(IsolationForest(max_samples="auto").fit, X)
    assert_raises(ValueError, IsolationForest(max_samples="foobar").fit, X)
Beispiel #40
0
def test_check_increasing_up_extreme():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, 1, 2, 3, 4, 5]

    # Check that we got increasing=True and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert is_increasing
Beispiel #41
0
def test_check_increasing_up():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, 1.5, 2.77, 8.99, 8.99, 50]

    # Check that we got increasing=True and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert is_increasing
Beispiel #42
0
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(UserWarning,
                                                          "mutually equal",
                                                          affinity_propagation,
                                                          S,
                                                          preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(UserWarning,
                                                          "mutually equal",
                                                          affinity_propagation,
                                                          S,
                                                          preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(affinity_propagation,
                                                        S,
                                                        preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels)
def test_check_increasing_down_extreme():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, -1, -2, -3, -4, -5]

    # Check that we got increasing=False and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert_false(is_increasing)
def test_check_increasing_up():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, 1.5, 2.77, 8.99, 8.99, 50]

    # Check that we got increasing=True and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert_true(is_increasing)
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(
        affinity_propagation, S, preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels)
Beispiel #46
0
def test_check_increasing_down():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, -1.5, -2.77, -8.99, -8.99, -50]

    # Check that we got increasing=False and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert not is_increasing
Beispiel #47
0
def test_check_increasing_down_extreme():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, -1, -2, -3, -4, -5]

    # Check that we got increasing=False and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert not is_increasing
def test_check_increasing_up_extreme():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, 1, 2, 3, 4, 5]

    # Check that we got increasing=True and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert_true(is_increasing)
def test_prf_average_compat():
    """Ensure warning if f1_score et al.'s average is implicit for multiclass
    """
    y_true = [1, 2, 3, 3]
    y_pred = [1, 2, 3, 1]

    for metric in [precision_score, recall_score, f1_score,
                   partial(fbeta_score, beta=2)]:
        score = assert_warns(DeprecationWarning, metric, y_true, y_pred)
        score_weighted = assert_no_warnings(metric, y_true, y_pred,
                                            average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default')

        # check binary passes without warning
        assert_no_warnings(metric, [0, 1, 1], [0, 1, 0])
def test_check_increasing_down():
    x = [0, 1, 2, 3, 4, 5]
    y = [0, -1.5, -2.77, -8.99, -8.99, -50]

    # Check that we got increasing=False and no warnings
    is_increasing = assert_no_warnings(check_increasing, x, y)
    assert_false(is_increasing)
Beispiel #51
0
def test_prf_average_compat():
    """Ensure warning if f1_score et al.'s average is implicit for multiclass
    """
    y_true = [1, 2, 3, 3]
    y_pred = [1, 2, 3, 1]

    for metric in [precision_score, recall_score, f1_score,
                   partial(fbeta_score, beta=2)]:
        score = assert_warns(DeprecationWarning, metric, y_true, y_pred)
        score_weighted = assert_no_warnings(metric, y_true, y_pred,
                                            average='weighted')
        assert_equal(score, score_weighted,
                     'average does not act like "weighted" by default')

        # check binary passes without warning
        assert_no_warnings(metric, [0, 1, 1], [0, 1, 0])
def test_regressormixin_score_multioutput():
    from sklearn.linear_model import LinearRegression
    # no warnings when y_type is continuous
    X = [[1], [2], [3]]
    y = [1, 2, 3]
    reg = LinearRegression().fit(X, y)
    assert_no_warnings(reg.score, X, y)
    # warn when y_type is continuous-multioutput
    y = [[1, 2], [2, 3], [3, 4]]
    reg = LinearRegression().fit(X, y)
    msg = ("The default value of multioutput (not exposed in "
           "score method) will change from 'variance_weighted' "
           "to 'uniform_average' in 0.23 to keep consistent "
           "with 'metrics.r2_score'. To use the new default, "
           "please either call 'metrics.r2_score' directly or "
           "make a custom scorer with 'metrics.make_scorer'.")
    assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1., 0.], [0., 1.], [1., 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert_equal(mdl.n_iter_, mdl.max_iter)

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert_equal(mdl.n_iter_, mdl.max_iter)

    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)
Beispiel #54
0
def test_regressormixin_score_multioutput():
    from sklearn.linear_model import LinearRegression
    # no warnings when y_type is continuous
    X = [[1], [2], [3]]
    y = [1, 2, 3]
    reg = LinearRegression().fit(X, y)
    assert_no_warnings(reg.score, X, y)
    # warn when y_type is continuous-multioutput
    y = [[1, 2], [2, 3], [3, 4]]
    reg = LinearRegression().fit(X, y)
    msg = ("The default value of multioutput (not exposed in "
           "score method) will change from 'variance_weighted' "
           "to 'uniform_average' in 0.23 to keep consistent "
           "with 'metrics.r2_score'. To use the new default, "
           "please either call 'metrics.r2_score' directly or "
           "make a custom scorer with 'metrics.make_scorer'.")
    assert_warns_message(FutureWarning, msg, reg.score, X, y)
def test_sensitivity_specificity_score_binary():
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    sen, spe, sup = sensitivity_specificity_support(
        y_true, y_pred, average=None)
    assert_allclose(sen, [0.88, 0.68], rtol=R_TOL)
    assert_allclose(spe, [0.68, 0.88], rtol=R_TOL)
    assert_array_equal(sup, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    for kwargs in ({}, {'average': 'binary'}):
        sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs)
        assert sen == pytest.approx(0.68, rel=R_TOL)

        spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs)
        assert spe == pytest.approx(0.88, rel=R_TOL)
Beispiel #56
0
def test_pickle_version_warning_is_not_raised_with_matching_version():
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_true(b"version" in tree_pickle)
    tree_restored = assert_no_warnings(pickle.loads, tree_pickle)

    # test that we can predict with the restored decision tree classifier
    score_of_original = tree.score(iris.data, iris.target)
    score_of_restored = tree_restored.score(iris.data, iris.target)
    assert_equal(score_of_original, score_of_restored)