예제 #1
0
def test_microgbt_train_predict():
    num_iters = 100
    early_stopping_rounds = 10

    X_train, X_valid, y_train, y_valid = load_titanic()

    # Train
    gbt = microgbtpy.GBT(params)
    gbt.train(X_train, y_train, X_valid, y_valid, num_iters,
              early_stopping_rounds)

    # Predict
    for x in X_valid:
        pred = gbt.predict(x, gbt.best_iteration())
        assert 0 <= pred <= 1
예제 #2
0
def test_microgbt_boston_rmse():
    num_iters = 100
    early_stopping_rounds = 10

    X_train, X_valid, y_train, y_valid = _load_boston()

    # Train
    gbt = microgbtpy.GBT(params)
    gbt.train(X_train, y_train, X_valid, y_valid, num_iters,
              early_stopping_rounds)

    # Predict
    y_valid_preds = []
    for x in X_valid:
        y_valid_preds.append(gbt.predict(x, gbt.best_iteration()))

    assert mean_squared_error(y_valid, y_valid_preds, squared=False) < 10.0
예제 #3
0
def test_microgbt_titanic_roc():
    num_iters = 100
    early_stopping_rounds = 10

    X_train, X_valid, y_train, y_valid = load_titanic()

    # Train
    gbt = microgbtpy.GBT(params)
    gbt.train(X_train, y_train, X_valid, y_valid, num_iters,
              early_stopping_rounds)

    # Predict
    y_valid_preds = []
    for x in X_valid:
        y_valid_preds.append(gbt.predict(x, gbt.best_iteration()))

    roc = roc_auc_score(y_valid, y_valid_preds)

    assert roc > 0.7, "Area under the curve must be greater than 0.7"
예제 #4
0
def gbt():
    return microgbtpy.GBT(params)
예제 #5
0
print("Input test valid dimensions {}".format(X_valid.shape))
print("Target valid dims: {}".format(y_valid.shape))
print("Input test dataset dimensions {}".format(X_test.shape))
print("Target test dims: {}".format(y_test.shape))

# Copied from https://github.com/microsoft/LightGBM/blob/master/examples/regression/train.conf
params = {
    "gamma": 0.1,
    "lambda": 1.0,
    "max_depth": 4.0,
    "shrinkage_rate": 1.0,
    "min_split_gain": 0.1,
    "learning_rate": 0.05,
    "min_tree_size": 100,
    "metric": 1.0
}

# Define the GBT
gbt = microgbtpy.GBT(params)
print(gbt)

# Training related parameters
num_iters = 5
early_stopping_rounds = 5
gbt.train(X_train, y_train, X_valid, y_valid, num_iters, early_stopping_rounds)

logger.info("Best iteration {}".format(gbt.best_iteration()))
regression_metrics(gbt, X_train, y_train, "Training")
regression_metrics(gbt, X_valid, y_valid, "Validation")
regression_metrics(gbt, X_test, y_test, "Testing")