def test_gb_ranking(n_samples=1000):
    """
    Testing RankingLossFunction
    """
    distance = 0.6
    testX, testY = generate_sample(n_samples, 10, distance)
    trainX, trainY = generate_sample(n_samples, 10, distance)

    rank_variable = 'column1'
    trainX[rank_variable] = numpy.random.randint(0, 3, size=len(trainX))
    testX[rank_variable] = numpy.random.randint(0, 3, size=len(testX))

    rank_loss1 = losses.RankBoostLossFunction(request_column=rank_variable,
                                              update_iterations=1)
    rank_loss2 = losses.RankBoostLossFunction(request_column=rank_variable,
                                              update_iterations=2)
    rank_loss3 = losses.RankBoostLossFunction(request_column=rank_variable,
                                              update_iterations=10)

    for loss in [rank_loss1, rank_loss2, rank_loss3]:
        clf = UGradientBoostingRegressor(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2,
                                         subsample=0.7, n_estimators=25, train_features=None) \
            .fit(trainX[:n_samples], trainY[:n_samples])
        result = roc_auc_score(testY, clf.predict(testX))
        assert result >= 0.8, "The quality is too poor: {} with loss: {}".format(
            result, loss)
def test_gb_regression(n_samples=1000):
    X, _ = generate_sample(n_samples, 10, distance=0.6)
    y = numpy.tanh(X.sum(axis=1))
    clf = UGradientBoostingRegressor(loss=MSELossFunction())
    clf.fit(X, y)
    y_pred = clf.predict(X)
    zeromse = 0.5 * mean_squared_error(y, y * 0.)
    assert mean_squared_error(y, y_pred) < zeromse, 'something wrong with regression quality'
Esempio n. 3
0
def test_gb_regression(n_samples=1000):
    X, _ = generate_sample(n_samples, 10, distance=0.6)
    y = numpy.tanh(X.sum(axis=1))
    clf = UGradientBoostingRegressor(loss=MSELossFunction())
    clf.fit(X, y)
    y_pred = clf.predict(X)
    zeromse = 0.5 * mean_squared_error(y, y * 0.)
    assert mean_squared_error(y, y_pred) < zeromse, 'something wrong with regression quality'
def test_constant_fitting(n_samples=1000, n_features=5):
    """
    Testing if initial constant fitted properly
    """
    X, y = generate_sample(n_samples=n_samples, n_features=n_features)
    y = y.astype(numpy.float) + 1000.
    for loss in [MSELossFunction(), losses.MAELossFunction()]:
        gb = UGradientBoostingRegressor(loss=loss, n_estimators=10)
        gb.fit(X, y)
        p = gb.predict(X)
        assert mean_squared_error(p, y) < 0.5
def test_constant_fitting(n_samples=1000, n_features=5):
    """
    Testing if initial constant fitted properly
    """
    X, y = generate_sample(n_samples=n_samples, n_features=n_features)
    y = y.astype(numpy.float) + 1000.
    for loss in [MSELossFunction(), losses.MAELossFunction()]:
        gb = UGradientBoostingRegressor(loss=loss, n_estimators=10)
        gb.fit(X, y)
        p = gb.predict(X)
        assert mean_squared_error(p, y) < 0.5
Esempio n. 6
0
def test_gb_ranking(n_samples=1000):
    distance = 0.6
    testX, testY = generate_sample(n_samples, 10, distance)
    trainX, trainY = generate_sample(n_samples, 10, distance)

    rank_variable = 'column1'
    trainX[rank_variable] = numpy.random.randint(0, 3, size=len(trainX))
    testX[rank_variable] = numpy.random.randint(0, 3, size=len(testX))

    rank_loss1 = RankBoostLossFunction(request_column=rank_variable, update_iterations=1)
    rank_loss2 = RankBoostLossFunction(request_column=rank_variable, update_iterations=2)
    rank_loss3 = RankBoostLossFunction(request_column=rank_variable, update_iterations=10)

    for loss in [rank_loss1, rank_loss2, rank_loss3]:
        clf = UGradientBoostingRegressor(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2,
                                         subsample=0.7, n_estimators=25, train_features=None) \
            .fit(trainX[:n_samples], trainY[:n_samples])
        result = roc_auc_score(testY, clf.predict(testX))
        assert result >= 0.8, "The quality is too poor: {} with loss: {}".format(result, loss)