def test_parameters_access(self): self._init_ray() from sklearn import datasets params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1} clf = RayXGBClassifier(n_estimators=1000, **params) assert clf.get_params()["updater"] == "grow_gpu_hist" assert clf.get_params()["subsample"] == 0.5 assert clf.get_params()["n_estimators"] == 1000 clf = RayXGBClassifier(n_estimators=1, nthread=4) X, y = datasets.load_iris(return_X_y=True) clf.fit(X, y) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 4 clf.set_params(nthread=16) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 16 clf.predict(X) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 16
def test_multiclass_classification(self): self._init_ray() from sklearn.datasets import load_iris from sklearn.model_selection import KFold def check_pred(preds, labels, output_margin): if output_margin: err = sum(1 for i in range(len(preds)) if preds[i].argmax() != labels[i]) / float( len(preds)) else: err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(len(preds)) assert err < 0.4 iris = load_iris() y = iris["target"] X = iris["data"] kf = KFold(n_splits=2, shuffle=True, random_state=self.rng) for train_index, test_index in kf.split(X, y): xgb_model = RayXGBClassifier().fit(X[train_index], y[train_index]) if hasattr(xgb_model.get_booster(), "num_boosted_rounds"): assert (xgb_model.get_booster().num_boosted_rounds() == xgb_model.n_estimators) preds = xgb_model.predict(X[test_index]) # test other params in XGBClassifier().fit preds2 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=3) preds3 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=0) preds4 = xgb_model.predict( X[test_index], output_margin=False, ntree_limit=3) labels = y[test_index] check_pred(preds, labels, output_margin=False) check_pred(preds2, labels, output_margin=True) check_pred(preds3, labels, output_margin=True) check_pred(preds4, labels, output_margin=False) cls = RayXGBClassifier(n_estimators=4).fit(X, y) assert cls.n_classes_ == 3 proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_ # custom objective, the default is multi:softprob # so no transformation is required. cls = RayXGBClassifier( n_estimators=4, objective=softprob_obj(3)).fit(X, y) proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_
def test_XGBClassifier_resume(self): self._init_ray() from sklearn.datasets import load_breast_cancer from sklearn.metrics import log_loss with TemporaryDirectory() as tempdir: model1_path = os.path.join(tempdir, "test_XGBClassifier.model") model1_booster_path = os.path.join(tempdir, "test_XGBClassifier.booster") X, Y = load_breast_cancer(return_X_y=True) model1 = RayXGBClassifier( learning_rate=0.3, random_state=0, n_estimators=8) model1.fit(X, Y) pred1 = model1.predict(X) log_loss1 = log_loss(pred1, Y) # file name of stored xgb model model1.save_model(model1_path) model2 = RayXGBClassifier( learning_rate=0.3, random_state=0, n_estimators=8) model2.fit(X, Y, xgb_model=model1_path) pred2 = model2.predict(X) log_loss2 = log_loss(pred2, Y) assert np.any(pred1 != pred2) assert log_loss1 > log_loss2 # file name of 'Booster' instance Xgb model model1.get_booster().save_model(model1_booster_path) model2 = RayXGBClassifier( learning_rate=0.3, random_state=0, n_estimators=8) model2.fit(X, Y, xgb_model=model1_booster_path) pred2 = model2.predict(X) log_loss2 = log_loss(pred2, Y) assert np.any(pred1 != pred2) assert log_loss1 > log_loss2
def save_load_model(self, model_path): from sklearn.datasets import load_digits from sklearn.model_selection import KFold digits = load_digits(n_class=2) y = digits["target"] X = digits["data"] kf = KFold(n_splits=2, shuffle=True, random_state=self.rng) for train_index, test_index in kf.split(X, y): xgb_model = RayXGBClassifier(use_label_encoder=False).fit( X[train_index], y[train_index]) xgb_model.save_model(model_path) xgb_model = RayXGBClassifier() xgb_model.load_model(model_path) assert xgb_model.use_label_encoder is False assert isinstance(xgb_model.classes_, np.ndarray) assert isinstance(xgb_model._Booster, xgb.Booster) preds = xgb_model.predict(X[test_index]) labels = y[test_index] err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) assert err < 0.1 assert xgb_model.get_booster().attr("scikit_learn") is None # test native booster preds = xgb_model.predict(X[test_index], output_margin=True) booster = xgb.Booster(model_file=model_path) predt_1 = booster.predict( xgb.DMatrix(X[test_index]), output_margin=True) assert np.allclose(preds, predt_1) with self.assertRaises(TypeError): xgb_model = xgb.XGBModel() xgb_model.load_model(model_path)