Ejemplo n.º 1
0
def test_gradient_boosting(n_samples=1000):
    """
    Testing workability of GradientBoosting with different loss function
    """
    # Generating some samples correlated with first variable
    distance = 0.6
    testX, testY = generate_sample(n_samples, 10, distance)
    trainX, trainY = generate_sample(n_samples, 10, distance)
    # We will try to get uniform distribution along this variable
    uniform_variables = ['column0']
    rank_variable = 'column1'
    trainX[rank_variable] = numpy.random.randint(0, 3, size=len(trainX))
    testX[rank_variable] = numpy.random.randint(0, 3, size=len(testX))

    loss1 = BinomialDevianceLossFunction()
    loss2 = AdaLossFunction()
    loss3 = CompositeLossFunction()
    loss4 = SimpleKnnLossFunction(uniform_variables=uniform_variables)
    loss5 = RankBoostLossFunction(request_column=rank_variable)
    loss51 = RankBoostLossFunction(request_column=rank_variable, update_terations=2)
    loss52 = RankBoostLossFunction(request_column=rank_variable, update_terations=10)
    loss6bin = BinFlatnessLossFunction(uniform_variables, ada_coefficient=0.5)
    loss7bin = BinFlatnessLossFunction(uniform_variables, ada_coefficient=0.5, uniform_label=[0, 1])
    loss6knn = KnnFlatnessLossFunction(uniform_variables, ada_coefficient=0.5)
    loss7knn = KnnFlatnessLossFunction(uniform_variables, ada_coefficient=0.5, uniform_label=[0, 1])

    for loss in [loss5, loss51, loss52, loss1, loss2, loss3, loss4, loss5, loss6bin, loss7bin, loss6knn, loss7knn]:
        clf = uGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2,
                                          subsample=0.7, n_estimators=25, train_variables=None) \
            .fit(trainX[:n_samples], trainY[:n_samples])
        result = clf.score(testX, testY)
        assert result >= 0.7, "The quality is too poor: {} with loss: {}".format(result, loss)
Ejemplo n.º 2
0
def test_gb_with_ada(n_samples=1000, n_features=10, distance=0.6):
    testX, testY = generate_sample(n_samples, n_features, distance=distance)
    trainX, trainY = generate_sample(n_samples, n_features, distance=distance)
    loss = BinomialDevianceLossFunction()
    clf = uGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=.2,
                                      subsample=0.7, n_estimators=10, train_variables=None)
    clf.fit(trainX, trainY)
    assert clf.n_features == n_features
    assert len(clf.feature_importances_) == n_features
    # checking that predict proba works
    for p in clf.staged_predict_proba(testX):
        assert p.shape == (n_samples, 2)
    assert numpy.all(p == clf.predict_proba(testX))