def test_microgbt_train_predict(): num_iters = 100 early_stopping_rounds = 10 X_train, X_valid, y_train, y_valid = load_titanic() # Train gbt = microgbtpy.GBT(params) gbt.train(X_train, y_train, X_valid, y_valid, num_iters, early_stopping_rounds) # Predict for x in X_valid: pred = gbt.predict(x, gbt.best_iteration()) assert 0 <= pred <= 1
def test_microgbt_boston_rmse(): num_iters = 100 early_stopping_rounds = 10 X_train, X_valid, y_train, y_valid = _load_boston() # Train gbt = microgbtpy.GBT(params) gbt.train(X_train, y_train, X_valid, y_valid, num_iters, early_stopping_rounds) # Predict y_valid_preds = [] for x in X_valid: y_valid_preds.append(gbt.predict(x, gbt.best_iteration())) assert mean_squared_error(y_valid, y_valid_preds, squared=False) < 10.0
def test_microgbt_titanic_roc(): num_iters = 100 early_stopping_rounds = 10 X_train, X_valid, y_train, y_valid = load_titanic() # Train gbt = microgbtpy.GBT(params) gbt.train(X_train, y_train, X_valid, y_valid, num_iters, early_stopping_rounds) # Predict y_valid_preds = [] for x in X_valid: y_valid_preds.append(gbt.predict(x, gbt.best_iteration())) roc = roc_auc_score(y_valid, y_valid_preds) assert roc > 0.7, "Area under the curve must be greater than 0.7"
def gbt(): return microgbtpy.GBT(params)
print("Input test valid dimensions {}".format(X_valid.shape)) print("Target valid dims: {}".format(y_valid.shape)) print("Input test dataset dimensions {}".format(X_test.shape)) print("Target test dims: {}".format(y_test.shape)) # Copied from https://github.com/microsoft/LightGBM/blob/master/examples/regression/train.conf params = { "gamma": 0.1, "lambda": 1.0, "max_depth": 4.0, "shrinkage_rate": 1.0, "min_split_gain": 0.1, "learning_rate": 0.05, "min_tree_size": 100, "metric": 1.0 } # Define the GBT gbt = microgbtpy.GBT(params) print(gbt) # Training related parameters num_iters = 5 early_stopping_rounds = 5 gbt.train(X_train, y_train, X_valid, y_valid, num_iters, early_stopping_rounds) logger.info("Best iteration {}".format(gbt.best_iteration())) regression_metrics(gbt, X_train, y_train, "Training") regression_metrics(gbt, X_valid, y_valid, "Validation") regression_metrics(gbt, X_test, y_test, "Testing")