Ejemplo n.º 1
0
def test__GroupProbaScorer():
    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = 1 * (np.random.randn(100) > 0)
    groups = np.array([0] * 25 + [1] * 25 + [2] * 25 + [3] * 25)

    logit = LogisticRegression(solver="lbfgs", random_state=123)
    logit.fit(X, y)

    scorer = _GroupProbaScorer(score_func=max_proba_group_accuracy,
                               sign=1,
                               kwargs={})

    res = scorer(logit, X, y, groups)

    assert isinstance(res, float)
    assert 0 <= res <= 1
    assert not pd.isnull(res)

    with pytest.raises(TypeError):
        res = scorer(logit, X, y)  # should not work because group is missing
Ejemplo n.º 2
0
def test_log_loss_patched_multioutput():
    np.random.seed(123)
    X = np.random.randn(100, 2)

    y1 = np.array(["AA"] * 33 +  ["BB"] * 33 + ["CC"] * 33 + ["DD"])
    y2 = np.array(["aaa"] * 50+ ["bbb"] * 40 + ["ccc"]* 9 + ["ddd"])
    y2d = np.concatenate((y1[:,np.newaxis],y2[:,np.newaxis]),axis=1)
    
    clf = RandomForestClassifier(n_estimators=10,random_state=123)
    clf.fit(X,y2d)
    
    scorer = log_loss_scorer_patched()
    
    s = scorer(clf,X,y2d)
    assert isinstance(s, float) # verify that the scorer works
    
    y_pred = clf.predict_proba(X)
    s2 = -0.5*log_loss(y2d[:,0],y_pred[0]) -0.5*log_loss(y2d[:,1],y_pred[1])
    assert s == s2