Example #1
0
def check_folding(classifier, check_instance=True, has_staged_pp=True, has_importances=True):
    X, y, sample_weight = generate_classification_data(distance=0.6)

    assert classifier == classifier.fit(X, y, sample_weight=sample_weight)
    assert list(classifier.features) == list(X.columns)

    check_classification_model(classifier, X, y, check_instance=check_instance, has_staged_pp=has_staged_pp,
                               has_importances=has_importances)

    def mean_vote(x):
        return numpy.mean(x, axis=0)

    labels = classifier.predict(X, mean_vote)
    proba = classifier.predict_proba(X, mean_vote)
    assert numpy.all(proba == classifier.predict_proba(X, mean_vote))

    score = accuracy_score(y, labels)
    print(score)
    assert score > 0.7
    assert numpy.allclose(proba.sum(axis=1), 1), 'probabilities do not sum to 1'
    assert numpy.all(proba >= 0.), 'negative probabilities'

    auc_score = roc_auc_score(y, proba[:, 1])
    print(auc_score)
    assert auc_score > 0.8
    if has_staged_pp:
        for p in classifier.staged_predict_proba(X, mean_vote):
            assert p.shape == (len(X), 2)
        # checking that last iteration coincides with previous
        assert numpy.all(p == proba)
Example #2
0
def check_folding(classifier, check_instance=True, has_staged_pp=True, has_importances=True):
    X, y, sample_weight = generate_classification_data(distance=0.6)

    assert classifier == classifier.fit(X, y, sample_weight=sample_weight)
    assert list(classifier.features) == list(X.columns)

    check_classification_model(classifier, X, y, check_instance=check_instance, has_staged_pp=has_staged_pp,
                has_importances=has_importances)

    def mean_vote(x):
        return numpy.mean(x, axis=0)

    labels = classifier.predict(X, mean_vote)
    proba = classifier.predict_proba(X, mean_vote)
    assert numpy.all(proba == classifier.predict_proba(X, mean_vote))

    score = accuracy_score(y, labels)
    print(score)
    assert score > 0.7
    assert numpy.allclose(proba.sum(axis=1), 1), 'probabilities do not sum to 1'
    assert numpy.all(proba >= 0.), 'negative probabilities'

    auc_score = roc_auc_score(y, proba[:, 1])
    print(auc_score)
    assert auc_score > 0.8
    if has_staged_pp:
        for p in classifier.staged_predict_proba(X, mean_vote):
            assert p.shape == (len(X), 2)
        # checking that last iteration coincides with previous
        assert numpy.all(p == proba)
Example #3
0
def check_grid(estimator, check_instance=True, has_staged_pp=True, has_importances=True, use_weights=False,
               classification=True):
    if classification:
        X, y, sample_weight = generate_classification_data()
    else:
        X, y, sample_weight = generate_regression_data()
    assert len(sample_weight) == len(X), 'somehow lengths are different'

    if use_weights:
        assert estimator == estimator.fit(X, y, sample_weight=sample_weight)
        estimator = estimator.fit_best_estimator(X, y, sample_weight=sample_weight)
    else:
        assert estimator == estimator.fit(X, y)
        estimator = estimator.fit_best_estimator(X, y)

    if classification:
        check_classification_model(estimator, X, y, check_instance=check_instance, has_staged_pp=has_staged_pp,
                                   has_importances=has_importances)
    else:
        check_regression_model(estimator, X, y, check_instance=check_instance, has_stages=has_staged_pp,
                               has_importances=has_importances)

    return estimator
Example #4
0
def check_grid(estimator,
               check_instance=True,
               has_staged_pp=True,
               has_importances=True,
               use_weights=False,
               classification=True):
    if classification:
        X, y, sample_weight = generate_classification_data()
    else:
        X, y, sample_weight = generate_regression_data()
    assert len(sample_weight) == len(X), 'somehow lengths are different'

    if use_weights:
        assert estimator == estimator.fit(X, y, sample_weight=sample_weight)
        estimator = estimator.fit_best_estimator(X,
                                                 y,
                                                 sample_weight=sample_weight)
    else:
        assert estimator == estimator.fit(X, y)
        estimator = estimator.fit_best_estimator(X, y)

    if classification:
        check_classification_model(estimator,
                                   X,
                                   y,
                                   check_instance=check_instance,
                                   has_staged_pp=has_staged_pp,
                                   has_importances=has_importances)
    else:
        check_regression_model(estimator,
                               X,
                               y,
                               check_instance=check_instance,
                               has_stages=has_staged_pp,
                               has_importances=has_importances)

    return estimator