Esempio n. 1
0
def test_auto_weight():
    """Test class weights for imbalanced data"""
    from sklearn.linear_model import LogisticRegression
    # we take as dataset a the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1
    from sklearn.svm.base import _get_class_weight
    X, y = iris.data[:, :2], iris.target
    unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2])

    assert_true(np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2)

    C = len(X)
    for clf in (svm.SVC(kernel='linear',
                        C=C), svm.LinearSVC(C=C), LogisticRegression(C=C)):
        # check that score is better when class='auto' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
        clf.set_params(class_weight='auto')
        y_pred_balanced = clf.fit(
            X[unbalanced],
            y[unbalanced],
        ).predict(X)
        assert_true(
            metrics.f1_score(y, y_pred) <= metrics.f1_score(
                y, y_pred_balanced))
Esempio n. 2
0
def test_auto_weight():
    """Test class weights for imbalanced data"""
    from sklearn.linear_model import LogisticRegression

    # we take as dataset a the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1
    from sklearn.svm.base import _get_class_weight

    X, y = iris.data[:, :2], iris.target
    unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2])

    assert np.argmax(_get_class_weight("auto", y[unbalanced])[0]) == 2

    for clf in (svm.SVC(kernel="linear"), svm.LinearSVC(), LogisticRegression()):
        # check that score is better when class='auto' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced], class_weight={}).predict(X)
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced], class_weight="auto").predict(X)
        assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)