Example #1
0
def test_constant_strategy_exceptions(y, params, err_msg):
    X = [[0], [0], [0], [0]]

    clf = DummyClassifier(strategy="constant", **params)

    with pytest.raises(ValueError, match=err_msg):
        clf.fit(X, y)
Example #2
0
def test_dummy_classifier_on_3D_array():
    X = np.array([[['foo']], [['bar']], [['baz']]])
    y = [2, 2, 2]
    y_expected = [2, 2, 2]
    y_proba_expected = [[1], [1], [1]]
    cls = DummyClassifier(strategy="stratified")
    cls.fit(X, y)
    y_pred = cls.predict(X)
    y_pred_proba = cls.predict_proba(X)
    assert_array_equal(y_pred, y_expected)
    assert_array_equal(y_pred_proba, y_proba_expected)
Example #3
0
def test_uniform_strategy():
    X = [[0]] * 4  # ignored
    y = [1, 2, 1, 1]
    clf = DummyClassifier(strategy="uniform", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    p = np.bincount(y_pred) / float(len(X))
    assert_almost_equal(p[1], 0.5, decimal=1)
    assert_almost_equal(p[2], 0.5, decimal=1)
    _check_predict_proba(clf, X, y)
Example #4
0
def test_classifier_prediction_independent_of_X(strategy):
    y = [0, 2, 1, 1]
    X1 = [[0]] * 4
    clf1 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    clf1.fit(X1, y)
    predictions1 = clf1.predict(X1)

    X2 = [[1]] * 4
    clf2 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    clf2.fit(X2, y)
    predictions2 = clf2.predict(X2)

    assert_array_equal(predictions1, predictions2)
Example #5
0
def test_most_frequent_and_prior_strategy_with_2d_column_y():
    # non-regression test added in
    # https://github.com/scikit-learn/scikit-learn/pull/13545
    X = [[0], [0], [0], [0]]
    y_1d = [1, 2, 1, 1]
    y_2d = [[1], [2], [1], [1]]

    for strategy in ("most_frequent", "prior"):
        clf_1d = DummyClassifier(strategy=strategy, random_state=0)
        clf_2d = DummyClassifier(strategy=strategy, random_state=0)

        clf_1d.fit(X, y_1d)
        clf_2d.fit(X, y_2d)
        assert_array_equal(clf_1d.predict(X), clf_2d.predict(X))
Example #6
0
def test_constant_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [2, 1, 2, 2]

    clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.ones(len(X)))
    _check_predict_proba(clf, X, y)

    X = [[0], [0], [0], [0]]  # ignored
    y = ['two', 'one', 'two', 'two']
    clf = DummyClassifier(strategy="constant", random_state=0, constant='one')
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.array(['one'] * 4))
    _check_predict_proba(clf, X, y)
Example #7
0
def test_most_frequent_and_prior_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [1, 2, 1, 1]

    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X), np.ones(len(X)))
        _check_predict_proba(clf, X, y)

        if strategy == "prior":
            assert_array_almost_equal(clf.predict_proba([X[0]]),
                                      clf.class_prior_.reshape((1, -1)))
        else:
            assert_array_almost_equal(clf.predict_proba([X[0]]),
                                      clf.class_prior_.reshape((1, -1)) > 0.5)
Example #8
0
def test_dtype_of_classifier_probas(strategy):
    y = [0, 2, 1, 1]
    X = np.zeros(4)
    model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    probas = model.fit(X, y).predict_proba(X)

    assert probas.dtype == np.float64
Example #9
0
def test_constant_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[0, 1],
                                [4, 0],
                                [1, 1],
                                [1, 4],
                                [1, 1]]))

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert sp.issparse(y_pred)
    assert_array_equal(y_pred.toarray(), np.hstack([np.ones((n_samples, 1)),
                                                    np.zeros((n_samples, 1))]))
Example #10
0
def test_constant_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[2, 3],
                  [1, 3],
                  [2, 3],
                  [2, 0]])

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0,
                          constant=[1, 0])
    clf.fit(X, y)
    assert_array_equal(clf.predict(X),
                       np.hstack([np.ones((n_samples, 1)),
                                  np.zeros((n_samples, 1))]))
    _check_predict_proba(clf, X, y)
Example #11
0
def test_most_frequent_and_prior_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[1, 0],
                  [2, 0],
                  [1, 0],
                  [1, 3]])

    n_samples = len(X)

    for strategy in ("prior", "most_frequent"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X),
                           np.hstack([np.ones((n_samples, 1)),
                                      np.zeros((n_samples, 1))]))
        _check_predict_proba(clf, X, y)
        _check_behavior_2d(clf)
Example #12
0
def test_most_frequent_and_prior_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[1, 0],
                                [1, 3],
                                [4, 0],
                                [0, 1],
                                [1, 0]]))

    n_samples = len(X)
    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)

        y_pred = clf.predict(X)
        assert sp.issparse(y_pred)
        assert_array_equal(y_pred.toarray(), y_expected)
Example #13
0
def test_uniform_strategy_multioutput():
    X = [[0]] * 4  # ignored
    y = np.array([[2, 1],
                  [2, 2],
                  [1, 2],
                  [1, 1]])
    clf = DummyClassifier(strategy="uniform", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 0.5, decimal=1)
        assert_almost_equal(p[2], 0.5, decimal=1)
        _check_predict_proba(clf, X, y)

    _check_behavior_2d(clf)
Example #14
0
def test_stratified_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[4, 1],
                                [0, 0],
                                [1, 1],
                                [1, 4],
                                [1, 1]]))

    clf = DummyClassifier(strategy="stratified", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    assert sp.issparse(y_pred)
    y_pred = y_pred.toarray()

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 3. / 5, decimal=1)
        assert_almost_equal(p[0], 1. / 5, decimal=1)
        assert_almost_equal(p[4], 1. / 5, decimal=1)
def test_partial_dependence_pipeline():
    # check that the partial dependence support pipeline
    iris = load_iris()

    scaler = StandardScaler()
    clf = DummyClassifier(random_state=42)
    pipe = make_pipeline(scaler, clf)

    clf.fit(scaler.fit_transform(iris.data), iris.target)
    pipe.fit(iris.data, iris.target)

    features = 0
    pdp_pipe, values_pipe = partial_dependence(pipe,
                                               iris.data,
                                               features=[features],
                                               grid_resolution=10)
    pdp_clf, values_clf = partial_dependence(clf,
                                             scaler.transform(iris.data),
                                             features=[features],
                                             grid_resolution=10)
    assert_allclose(pdp_pipe, pdp_clf)
    assert_allclose(
        values_pipe[0],
        values_clf[0] * scaler.scale_[features] + scaler.mean_[features])
Example #16
0
class NoWeightClassifier(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        self.clf = DummyClassifier(strategy='stratified')
        return self.clf.fit(X, y)
Example #17
0
def test_string_labels():
    X = [[0]] * 5
    y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), ["paris"] * 5)
Example #18
0
def test_classifier_score_with_None(y, y_test):
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(None, y)
    assert clf.score(None, y_test) == 0.5