def test_auto_weight(): """Test class weights for imbalanced data""" from sklearn.linear_model import LogisticRegression # we take as dataset a the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1 from sklearn.svm.base import _get_class_weight X, y = iris.data[:, :2], iris.target unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2]) assert_true(np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2) C = len(X) for clf in (svm.SVC(kernel='linear', C=C), svm.LinearSVC(C=C), LogisticRegression(C=C)): # check that score is better when class='auto' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='auto') y_pred_balanced = clf.fit( X[unbalanced], y[unbalanced], ).predict(X) assert_true( metrics.f1_score(y, y_pred) <= metrics.f1_score( y, y_pred_balanced))
def test_auto_weight(): """Test class weights for imbalanced data""" from sklearn.linear_model import LogisticRegression # we take as dataset a the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1 from sklearn.svm.base import _get_class_weight X, y = iris.data[:, :2], iris.target unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2]) assert np.argmax(_get_class_weight("auto", y[unbalanced])[0]) == 2 for clf in (svm.SVC(kernel="linear"), svm.LinearSVC(), LogisticRegression()): # check that score is better when class='auto' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced], class_weight={}).predict(X) y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced], class_weight="auto").predict(X) assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)