def test_num_parallel_tree(self): self._init_ray() from sklearn.datasets import load_boston reg = RayXGBRegressor( n_estimators=4, num_parallel_tree=4, tree_method="hist") boston = load_boston() bst = reg.fit(X=boston["data"], y=boston["target"]) dump = bst.get_booster().get_dump(dump_format="json") assert len(dump) == 16 if XGBOOST_LOOSE_VERSION != LooseVersion("0.90"): reg = RayXGBRFRegressor(n_estimators=4) bst = reg.fit(X=boston["data"], y=boston["target"]) dump = bst.get_booster().get_dump(dump_format="json") assert len(dump) == 4 if XGBOOST_LOOSE_VERSION >= LooseVersion("1.6.0"): config = json.loads(bst.get_booster().save_config()) assert (int(config["learner"]["gradient_booster"][ "gbtree_model_param"]["num_parallel_tree"]) == 4) else: config = json.loads(bst.get_booster().save_config()) assert (int(config["learner"]["gradient_booster"][ "gbtree_train_param"]["num_parallel_tree"]) == 4)
def test_constraint_parameters(self): self._init_ray() reg = RayXGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]") X = np.random.randn(10, 10) y = np.random.randn(10) reg.fit(X, y) config = json.loads(reg.get_booster().save_config()) if XGBOOST_LOOSE_VERSION >= LooseVersion("1.6.0"): assert (config["learner"]["gradient_booster"]["updater"][ "grow_histmaker"]["train_param"]["interaction_constraints"] == "[[0, 1], [2, 3, 4]]") else: assert (config["learner"]["gradient_booster"]["updater"]["prune"][ "train_param"]["interaction_constraints"] == "[[0, 1], [2, 3, 4]]")
def test_regression_with_custom_objective(self): self._init_ray() from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston from sklearn.model_selection import KFold def objective_ls(y_true, y_pred): grad = y_pred - y_true hess = np.ones(len(y_true)) return grad, hess boston = load_boston() y = boston["target"] X = boston["data"] kf = KFold(n_splits=2, shuffle=True, random_state=self.rng) for train_index, test_index in kf.split(X, y): xgb_model = RayXGBRegressor(objective=objective_ls).fit( X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) labels = y[test_index] assert mean_squared_error(preds, labels) < 25 # Test that the custom objective function is actually used class XGBCustomObjectiveException(Exception): pass def dummy_objective(y_true, y_pred): raise XGBCustomObjectiveException() xgb_model = RayXGBRegressor(objective=dummy_objective) # TODO figure out how to assertRaises XGBCustomObjectiveException with self.assertRaises(RuntimeError): xgb_model.fit(X, y)
def test_boston_housing_regression(self): self._init_ray() from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston from sklearn.model_selection import KFold boston = load_boston() y = boston["target"] X = boston["data"] kf = KFold(n_splits=2, shuffle=True, random_state=self.rng) for train_index, test_index in kf.split(X, y): xgb_model = RayXGBRegressor().fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) # test other params in XGBRegressor().fit preds2 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=3) preds3 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=0) preds4 = xgb_model.predict( X[test_index], output_margin=False, ntree_limit=3) labels = y[test_index] assert mean_squared_error(preds, labels) < 25 assert mean_squared_error(preds2, labels) < 350 assert mean_squared_error(preds3, labels) < 25 assert mean_squared_error(preds4, labels) < 350
def test_estimator_type(self): self._init_ray() assert RayXGBClassifier._estimator_type == "classifier" assert RayXGBRFClassifier._estimator_type == "classifier" assert RayXGBRegressor._estimator_type == "regressor" assert RayXGBRFRegressor._estimator_type == "regressor" assert RayXGBRanker._estimator_type == "ranker" from sklearn.datasets import load_digits X, y = load_digits(n_class=2, return_X_y=True) cls = RayXGBClassifier(n_estimators=2).fit(X, y) with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, "cls.json") cls.save_model(path) reg = RayXGBRegressor() with self.assertRaises(TypeError): reg.load_model(path) cls = RayXGBClassifier() cls.load_model(path) # no error
def test_parameter_tuning(self): self._init_ray() from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_boston boston = load_boston() y = boston["target"] X = boston["data"] xgb_model = RayXGBRegressor(learning_rate=0.1) clf = GridSearchCV( xgb_model, { "max_depth": [2, 4, 6], "n_estimators": [50, 100, 200] }, cv=3, verbose=1, ) clf.fit(X, y) assert clf.best_score_ < 0.7 assert clf.best_params_ == {"n_estimators": 100, "max_depth": 4}
def test_stacking_regression(self): self._init_ray() from sklearn.model_selection import train_test_split from sklearn.datasets import load_diabetes from sklearn.linear_model import RidgeCV from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import StackingRegressor X, y = load_diabetes(return_X_y=True) estimators = [ ("gbm", RayXGBRegressor(objective="reg:squarederror")), ("lr", RidgeCV()), ] reg = StackingRegressor( estimators=estimators, final_estimator=RandomForestRegressor( n_estimators=10, random_state=42), ) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=42) reg.fit(X_train, y_train).score(X_test, y_test)
def testRegressor(self): self._init_ray() from sklearn.datasets import load_boston boston = load_boston() y = boston["target"] X = boston["data"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) train_matrix = RayDMatrix(X_train, y_train) test_matrix = RayDMatrix(X_test, y_test) with self.assertRaisesRegex(Exception, r"must be \(RayDMatrix, str\)"): RayXGBRegressor(**self.params).fit(train_matrix, None, eval_set=[(X_test, y_test)]) with self.assertRaisesRegex(Exception, r"must be \(array_like, array_like\)"): RayXGBRegressor(**self.params).fit(X_train, y_train, eval_set=[(test_matrix, "eval") ]) RayXGBRegressor(**self.params).fit(train_matrix, None) reg = RayXGBRegressor(**self.params).fit(train_matrix, None, eval_set=[(test_matrix, "eval")]) reg.predict(test_matrix)