Ejemplo n.º 1
0
    def test_parameters_access(self):
        self._init_ray()

        from sklearn import datasets

        params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
        clf = RayXGBClassifier(n_estimators=1000, **params)
        assert clf.get_params()["updater"] == "grow_gpu_hist"
        assert clf.get_params()["subsample"] == 0.5
        assert clf.get_params()["n_estimators"] == 1000

        clf = RayXGBClassifier(n_estimators=1, nthread=4)
        X, y = datasets.load_iris(return_X_y=True)
        clf.fit(X, y)

        config = json.loads(clf.get_booster().save_config())
        assert int(config["learner"]["generic_param"]["nthread"]) == 4

        clf.set_params(nthread=16)
        config = json.loads(clf.get_booster().save_config())
        assert int(config["learner"]["generic_param"]["nthread"]) == 16

        clf.predict(X)
        config = json.loads(clf.get_booster().save_config())
        assert int(config["learner"]["generic_param"]["nthread"]) == 16
Ejemplo n.º 2
0
    def test_multiclass_classification(self):
        self._init_ray()

        from sklearn.datasets import load_iris
        from sklearn.model_selection import KFold

        def check_pred(preds, labels, output_margin):
            if output_margin:
                err = sum(1 for i in range(len(preds))
                          if preds[i].argmax() != labels[i]) / float(
                              len(preds))
            else:
                err = sum(1 for i in range(len(preds))
                          if preds[i] != labels[i]) / float(len(preds))
            assert err < 0.4

        iris = load_iris()
        y = iris["target"]
        X = iris["data"]
        kf = KFold(n_splits=2, shuffle=True, random_state=self.rng)
        for train_index, test_index in kf.split(X, y):
            xgb_model = RayXGBClassifier().fit(X[train_index], y[train_index])
            if hasattr(xgb_model.get_booster(), "num_boosted_rounds"):
                assert (xgb_model.get_booster().num_boosted_rounds() ==
                        xgb_model.n_estimators)
            preds = xgb_model.predict(X[test_index])
            # test other params in XGBClassifier().fit
            preds2 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=3)
            preds3 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=0)
            preds4 = xgb_model.predict(
                X[test_index], output_margin=False, ntree_limit=3)
            labels = y[test_index]

            check_pred(preds, labels, output_margin=False)
            check_pred(preds2, labels, output_margin=True)
            check_pred(preds3, labels, output_margin=True)
            check_pred(preds4, labels, output_margin=False)

        cls = RayXGBClassifier(n_estimators=4).fit(X, y)
        assert cls.n_classes_ == 3
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == cls.n_classes_

        # custom objective, the default is multi:softprob
        # so no transformation is required.
        cls = RayXGBClassifier(
            n_estimators=4, objective=softprob_obj(3)).fit(X, y)
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == cls.n_classes_
Ejemplo n.º 3
0
    def test_XGBClassifier_resume(self):
        self._init_ray()

        from sklearn.datasets import load_breast_cancer
        from sklearn.metrics import log_loss

        with TemporaryDirectory() as tempdir:
            model1_path = os.path.join(tempdir, "test_XGBClassifier.model")
            model1_booster_path = os.path.join(tempdir,
                                               "test_XGBClassifier.booster")

            X, Y = load_breast_cancer(return_X_y=True)

            model1 = RayXGBClassifier(
                learning_rate=0.3, random_state=0, n_estimators=8)
            model1.fit(X, Y)

            pred1 = model1.predict(X)
            log_loss1 = log_loss(pred1, Y)

            # file name of stored xgb model
            model1.save_model(model1_path)
            model2 = RayXGBClassifier(
                learning_rate=0.3, random_state=0, n_estimators=8)
            model2.fit(X, Y, xgb_model=model1_path)

            pred2 = model2.predict(X)
            log_loss2 = log_loss(pred2, Y)

            assert np.any(pred1 != pred2)
            assert log_loss1 > log_loss2

            # file name of 'Booster' instance Xgb model
            model1.get_booster().save_model(model1_booster_path)
            model2 = RayXGBClassifier(
                learning_rate=0.3, random_state=0, n_estimators=8)
            model2.fit(X, Y, xgb_model=model1_booster_path)

            pred2 = model2.predict(X)
            log_loss2 = log_loss(pred2, Y)

            assert np.any(pred1 != pred2)
            assert log_loss1 > log_loss2
Ejemplo n.º 4
0
    def save_load_model(self, model_path):
        from sklearn.datasets import load_digits
        from sklearn.model_selection import KFold

        digits = load_digits(n_class=2)
        y = digits["target"]
        X = digits["data"]
        kf = KFold(n_splits=2, shuffle=True, random_state=self.rng)
        for train_index, test_index in kf.split(X, y):
            xgb_model = RayXGBClassifier(use_label_encoder=False).fit(
                X[train_index], y[train_index])
            xgb_model.save_model(model_path)

            xgb_model = RayXGBClassifier()
            xgb_model.load_model(model_path)

            assert xgb_model.use_label_encoder is False
            assert isinstance(xgb_model.classes_, np.ndarray)
            assert isinstance(xgb_model._Booster, xgb.Booster)

            preds = xgb_model.predict(X[test_index])
            labels = y[test_index]
            err = sum(1 for i in range(len(preds))
                      if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
            assert err < 0.1
            assert xgb_model.get_booster().attr("scikit_learn") is None

            # test native booster
            preds = xgb_model.predict(X[test_index], output_margin=True)
            booster = xgb.Booster(model_file=model_path)
            predt_1 = booster.predict(
                xgb.DMatrix(X[test_index]), output_margin=True)
            assert np.allclose(preds, predt_1)

            with self.assertRaises(TypeError):
                xgb_model = xgb.XGBModel()
                xgb_model.load_model(model_path)