Ejemplo n.º 1
0
def test_ovr_partial_fit():
    # Test if partial_fit is working as intended
    X, y = shuffle(iris.data, iris.target, random_state=0)
    ovr = OneVsRestClassifier(MultinomialNB())
    ovr.partial_fit(X[:100], y[:100], np.unique(y))
    ovr.partial_fit(X[100:], y[100:])
    pred = ovr.predict(X)
    ovr2 = OneVsRestClassifier(MultinomialNB())
    pred2 = ovr2.fit(X, y).predict(X)

    assert_almost_equal(pred, pred2)
    assert len(ovr.estimators_) == len(np.unique(y))
    assert np.mean(y == pred) > 0.65

    # Test when mini batches doesn't have all classes
    # with SGDClassifier
    X = np.abs(np.random.randn(14, 2))
    y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3]

    ovr = OneVsRestClassifier(SGDClassifier(max_iter=1, tol=None,
                                            shuffle=False, random_state=0))
    ovr.partial_fit(X[:7], y[:7], np.unique(y))
    ovr.partial_fit(X[7:], y[7:])
    pred = ovr.predict(X)
    ovr1 = OneVsRestClassifier(SGDClassifier(max_iter=1, tol=None,
                                             shuffle=False, random_state=0))
    pred1 = ovr1.fit(X, y).predict(X)
    assert np.mean(pred == y) == np.mean(pred1 == y)

    # test partial_fit only exists if estimator has it:
    ovr = OneVsRestClassifier(SVC())
    assert not hasattr(ovr, "partial_fit")
Ejemplo n.º 2
0
def test_ovr_coef_exceptions():
    # Not fitted exception!
    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
    # lambda is needed because we don't want coef_ to be evaluated right away
    assert_raises(ValueError, lambda x: ovr.coef_, None)

    # Doesn't have coef_ exception!
    ovr = OneVsRestClassifier(DecisionTreeClassifier())
    ovr.fit(iris.data, iris.target)
    assert_raises(AttributeError, lambda x: ovr.coef_, None)
Ejemplo n.º 3
0
def test_ovr_pipeline():
    # Test with pipeline of length one
    # This test is needed because the multiclass estimators may fail to detect
    # the presence of predict_proba or decision_function.
    clf = Pipeline([("tree", DecisionTreeClassifier())])
    ovr_pipe = OneVsRestClassifier(clf)
    ovr_pipe.fit(iris.data, iris.target)
    ovr = OneVsRestClassifier(DecisionTreeClassifier())
    ovr.fit(iris.data, iris.target)
    assert_array_equal(ovr.predict(iris.data), ovr_pipe.predict(iris.data))
Ejemplo n.º 4
0
def test_ovr_fit_predict():
    # A classifier which implements decision_function.
    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert len(ovr.estimators_) == n_classes

    clf = LinearSVC(random_state=0)
    pred2 = clf.fit(iris.data, iris.target).predict(iris.data)
    assert np.mean(iris.target == pred) == np.mean(iris.target == pred2)

    # A classifier which implements predict_proba.
    ovr = OneVsRestClassifier(MultinomialNB())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert np.mean(iris.target == pred) > 0.65
Ejemplo n.º 5
0
def test_ovr_coef_():
    for base_classifier in [SVC(kernel='linear', random_state=0),
                            LinearSVC(random_state=0)]:
        # SVC has sparse coef with sparse input data

        ovr = OneVsRestClassifier(base_classifier)
        for X in [iris.data, sp.csr_matrix(iris.data)]:
            # test with dense and sparse coef
            ovr.fit(X, iris.target)
            shape = ovr.coef_.shape
            assert shape[0] == n_classes
            assert shape[1] == iris.data.shape[1]
            # don't densify sparse coefficients
            assert (sp.issparse(ovr.estimators_[0].coef_) ==
                         sp.issparse(ovr.coef_))
Ejemplo n.º 6
0
def test_ovr_ovo_regressor():
    # test that ovr and ovo work on regressors which don't have a decision_
    # function
    ovr = OneVsRestClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert len(ovr.estimators_) == n_classes
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert np.mean(pred == iris.target) > .9

    ovr = OneVsOneClassifier(DecisionTreeRegressor())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2
    assert_array_equal(np.unique(pred), [0, 1, 2])
    # we are doing something sensible
    assert np.mean(pred == iris.target) > .9
Ejemplo n.º 7
0
def test_ovr_multilabel_predict_proba():
    base_clf = MultinomialNB(alpha=1)
    for au in (False, True):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=3,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test = X[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)

        # Decision function only estimator.
        decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
        assert not hasattr(decision_only, 'predict_proba')

        # Estimator with predict_proba disabled, depending on parameters.
        decision_only = OneVsRestClassifier(svm.SVC(probability=False))
        assert not hasattr(decision_only, 'predict_proba')
        decision_only.fit(X_train, Y_train)
        assert not hasattr(decision_only, 'predict_proba')
        assert hasattr(decision_only, 'decision_function')

        # Estimator which can get predict_proba enabled after fitting
        gs = GridSearchCV(svm.SVC(probability=False),
                          param_grid={'probability': [True]})
        proba_after_fit = OneVsRestClassifier(gs)
        assert not hasattr(proba_after_fit, 'predict_proba')
        proba_after_fit.fit(X_train, Y_train)
        assert hasattr(proba_after_fit, 'predict_proba')

        Y_pred = clf.predict(X_test)
        Y_proba = clf.predict_proba(X_test)

        # predict assigns a label if the probability that the
        # sample has the label is greater than 0.5.
        pred = Y_proba > .5
        assert_array_equal(pred, Y_pred)
Ejemplo n.º 8
0
def test_ovr_fit_predict_svc():
    ovr = OneVsRestClassifier(svm.SVC())
    ovr.fit(iris.data, iris.target)
    assert len(ovr.estimators_) == 3
    assert ovr.score(iris.data, iris.target) > .9