コード例 #1
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_multiclass_classification(self):
        self._init_ray()

        from sklearn.datasets import load_iris
        from sklearn.model_selection import KFold

        def check_pred(preds, labels, output_margin):
            if output_margin:
                err = sum(1 for i in range(len(preds))
                          if preds[i].argmax() != labels[i]) / float(
                              len(preds))
            else:
                err = sum(1 for i in range(len(preds))
                          if preds[i] != labels[i]) / float(len(preds))
            assert err < 0.4

        iris = load_iris()
        y = iris["target"]
        X = iris["data"]
        kf = KFold(n_splits=2, shuffle=True, random_state=self.rng)
        for train_index, test_index in kf.split(X, y):
            xgb_model = RayXGBClassifier().fit(X[train_index], y[train_index])
            if hasattr(xgb_model.get_booster(), "num_boosted_rounds"):
                assert (xgb_model.get_booster().num_boosted_rounds() ==
                        xgb_model.n_estimators)
            preds = xgb_model.predict(X[test_index])
            # test other params in XGBClassifier().fit
            preds2 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=3)
            preds3 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=0)
            preds4 = xgb_model.predict(
                X[test_index], output_margin=False, ntree_limit=3)
            labels = y[test_index]

            check_pred(preds, labels, output_margin=False)
            check_pred(preds2, labels, output_margin=True)
            check_pred(preds3, labels, output_margin=True)
            check_pred(preds4, labels, output_margin=False)

        cls = RayXGBClassifier(n_estimators=4).fit(X, y)
        assert cls.n_classes_ == 3
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == cls.n_classes_

        # custom objective, the default is multi:softprob
        # so no transformation is required.
        cls = RayXGBClassifier(
            n_estimators=4, objective=softprob_obj(3)).fit(X, y)
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == cls.n_classes_
コード例 #2
0
    def testClassifierLabelEncoder(self, n_class=2):
        self._init_ray()

        from sklearn.datasets import load_digits

        digits = load_digits(n_class=n_class)
        y = digits["target"]
        X = digits["data"]

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5)

        train_matrix = RayDMatrix(X_train, y_train)
        test_matrix = RayDMatrix(X_test, y_test)

        with self.assertRaisesRegex(Exception, "use_label_encoder"):
            RayXGBClassifier(use_label_encoder=True,
                             **self.params).fit(train_matrix, None)

        with self.assertRaisesRegex(Exception, "num_class"):
            RayXGBClassifier(use_label_encoder=False,
                             **self.params).fit(train_matrix, None)

        with self.assertRaisesRegex(Exception, r"must be \(RayDMatrix, str\)"):
            RayXGBClassifier(use_label_encoder=False,
                             **self.params).fit(train_matrix,
                                                None,
                                                eval_set=[(X_test, y_test)])

        with self.assertRaisesRegex(Exception,
                                    r"must be \(array_like, array_like\)"):
            RayXGBClassifier(use_label_encoder=False,
                             **self.params).fit(X_train,
                                                y_train,
                                                eval_set=[(test_matrix, "eval")
                                                          ])

        RayXGBClassifier(use_label_encoder=False,
                         num_class=n_class,
                         **self.params).fit(train_matrix, None)

        clf = RayXGBClassifier(use_label_encoder=False,
                               num_class=n_class,
                               **self.params).fit(train_matrix,
                                                  None,
                                                  eval_set=[(test_matrix,
                                                             "eval")])

        clf.predict(test_matrix)
        clf.predict_proba(test_matrix)
コード例 #3
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_save_load_model(self):
        self._init_ray()

        with TemporaryDirectory() as tempdir:
            model_path = os.path.join(tempdir, "digits.model")
            self.save_load_model(model_path)

        with TemporaryDirectory() as tempdir:
            model_path = os.path.join(tempdir, "digits.model.json")
            self.save_load_model(model_path)

        from sklearn.datasets import load_digits

        with TemporaryDirectory() as tempdir:
            model_path = os.path.join(tempdir, "digits.model.json")
            digits = load_digits(n_class=2)
            y = digits["target"]
            X = digits["data"]
            booster = xgb.train(
                {
                    "tree_method": "hist",
                    "objective": "binary:logistic"
                },
                dtrain=xgb.DMatrix(X, y),
                num_boost_round=4,
            )
            predt_0 = booster.predict(xgb.DMatrix(X))
            booster.save_model(model_path)
            cls = RayXGBClassifier()
            cls.load_model(model_path)

            proba = cls.predict_proba(X)
            assert proba.shape[0] == X.shape[0]
            assert proba.shape[1] == 2  # binary

            predt_1 = cls.predict_proba(X)[:, 1]
            assert np.allclose(predt_0, predt_1)

            cls = xgb.XGBModel()
            cls.load_model(model_path)
            predt_1 = cls.predict(X)
            assert np.allclose(predt_0, predt_1)