コード例 #1
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_num_parallel_tree(self):
        self._init_ray()

        from sklearn.datasets import load_boston

        reg = RayXGBRegressor(
            n_estimators=4, num_parallel_tree=4, tree_method="hist")
        boston = load_boston()
        bst = reg.fit(X=boston["data"], y=boston["target"])
        dump = bst.get_booster().get_dump(dump_format="json")
        assert len(dump) == 16

        if XGBOOST_LOOSE_VERSION != LooseVersion("0.90"):
            reg = RayXGBRFRegressor(n_estimators=4)
            bst = reg.fit(X=boston["data"], y=boston["target"])
            dump = bst.get_booster().get_dump(dump_format="json")
            assert len(dump) == 4

            if XGBOOST_LOOSE_VERSION >= LooseVersion("1.6.0"):
                config = json.loads(bst.get_booster().save_config())
                assert (int(config["learner"]["gradient_booster"][
                    "gbtree_model_param"]["num_parallel_tree"]) == 4)
            else:
                config = json.loads(bst.get_booster().save_config())
                assert (int(config["learner"]["gradient_booster"][
                    "gbtree_train_param"]["num_parallel_tree"]) == 4)
コード例 #2
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_constraint_parameters(self):
        self._init_ray()

        reg = RayXGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]")
        X = np.random.randn(10, 10)
        y = np.random.randn(10)
        reg.fit(X, y)

        config = json.loads(reg.get_booster().save_config())
        if XGBOOST_LOOSE_VERSION >= LooseVersion("1.6.0"):
            assert (config["learner"]["gradient_booster"]["updater"][
                "grow_histmaker"]["train_param"]["interaction_constraints"] ==
                    "[[0, 1], [2, 3, 4]]")
        else:
            assert (config["learner"]["gradient_booster"]["updater"]["prune"][
                "train_param"]["interaction_constraints"] ==
                    "[[0, 1], [2, 3, 4]]")
コード例 #3
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_regression_with_custom_objective(self):
        self._init_ray()

        from sklearn.metrics import mean_squared_error
        from sklearn.datasets import load_boston
        from sklearn.model_selection import KFold

        def objective_ls(y_true, y_pred):
            grad = y_pred - y_true
            hess = np.ones(len(y_true))
            return grad, hess

        boston = load_boston()
        y = boston["target"]
        X = boston["data"]
        kf = KFold(n_splits=2, shuffle=True, random_state=self.rng)
        for train_index, test_index in kf.split(X, y):
            xgb_model = RayXGBRegressor(objective=objective_ls).fit(
                X[train_index], y[train_index])
            preds = xgb_model.predict(X[test_index])
            labels = y[test_index]
        assert mean_squared_error(preds, labels) < 25

        # Test that the custom objective function is actually used
        class XGBCustomObjectiveException(Exception):
            pass

        def dummy_objective(y_true, y_pred):
            raise XGBCustomObjectiveException()

        xgb_model = RayXGBRegressor(objective=dummy_objective)
        # TODO figure out how to assertRaises XGBCustomObjectiveException
        with self.assertRaises(RuntimeError):
            xgb_model.fit(X, y)
コード例 #4
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_boston_housing_regression(self):
        self._init_ray()

        from sklearn.metrics import mean_squared_error
        from sklearn.datasets import load_boston
        from sklearn.model_selection import KFold

        boston = load_boston()
        y = boston["target"]
        X = boston["data"]
        kf = KFold(n_splits=2, shuffle=True, random_state=self.rng)
        for train_index, test_index in kf.split(X, y):
            xgb_model = RayXGBRegressor().fit(X[train_index], y[train_index])

            preds = xgb_model.predict(X[test_index])
            # test other params in XGBRegressor().fit
            preds2 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=3)
            preds3 = xgb_model.predict(
                X[test_index], output_margin=True, ntree_limit=0)
            preds4 = xgb_model.predict(
                X[test_index], output_margin=False, ntree_limit=3)
            labels = y[test_index]

            assert mean_squared_error(preds, labels) < 25
            assert mean_squared_error(preds2, labels) < 350
            assert mean_squared_error(preds3, labels) < 25
            assert mean_squared_error(preds4, labels) < 350
コード例 #5
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_estimator_type(self):
        self._init_ray()

        assert RayXGBClassifier._estimator_type == "classifier"
        assert RayXGBRFClassifier._estimator_type == "classifier"
        assert RayXGBRegressor._estimator_type == "regressor"
        assert RayXGBRFRegressor._estimator_type == "regressor"
        assert RayXGBRanker._estimator_type == "ranker"

        from sklearn.datasets import load_digits

        X, y = load_digits(n_class=2, return_X_y=True)
        cls = RayXGBClassifier(n_estimators=2).fit(X, y)
        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, "cls.json")
            cls.save_model(path)

            reg = RayXGBRegressor()
            with self.assertRaises(TypeError):
                reg.load_model(path)

            cls = RayXGBClassifier()
            cls.load_model(path)  # no error
コード例 #6
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_parameter_tuning(self):
        self._init_ray()

        from sklearn.model_selection import GridSearchCV
        from sklearn.datasets import load_boston

        boston = load_boston()
        y = boston["target"]
        X = boston["data"]
        xgb_model = RayXGBRegressor(learning_rate=0.1)
        clf = GridSearchCV(
            xgb_model,
            {
                "max_depth": [2, 4, 6],
                "n_estimators": [50, 100, 200]
            },
            cv=3,
            verbose=1,
        )
        clf.fit(X, y)
        assert clf.best_score_ < 0.7
        assert clf.best_params_ == {"n_estimators": 100, "max_depth": 4}
コード例 #7
0
ファイル: test_sklearn.py プロジェクト: krfricke/xgboost_ray
    def test_stacking_regression(self):
        self._init_ray()

        from sklearn.model_selection import train_test_split
        from sklearn.datasets import load_diabetes
        from sklearn.linear_model import RidgeCV
        from sklearn.ensemble import RandomForestRegressor
        from sklearn.ensemble import StackingRegressor

        X, y = load_diabetes(return_X_y=True)
        estimators = [
            ("gbm", RayXGBRegressor(objective="reg:squarederror")),
            ("lr", RidgeCV()),
        ]
        reg = StackingRegressor(
            estimators=estimators,
            final_estimator=RandomForestRegressor(
                n_estimators=10, random_state=42),
        )

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, random_state=42)
        reg.fit(X_train, y_train).score(X_test, y_test)
コード例 #8
0
    def testRegressor(self):
        self._init_ray()

        from sklearn.datasets import load_boston

        boston = load_boston()
        y = boston["target"]
        X = boston["data"]

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5)

        train_matrix = RayDMatrix(X_train, y_train)
        test_matrix = RayDMatrix(X_test, y_test)

        with self.assertRaisesRegex(Exception, r"must be \(RayDMatrix, str\)"):
            RayXGBRegressor(**self.params).fit(train_matrix,
                                               None,
                                               eval_set=[(X_test, y_test)])

        with self.assertRaisesRegex(Exception,
                                    r"must be \(array_like, array_like\)"):
            RayXGBRegressor(**self.params).fit(X_train,
                                               y_train,
                                               eval_set=[(test_matrix, "eval")
                                                         ])

        RayXGBRegressor(**self.params).fit(train_matrix, None)

        reg = RayXGBRegressor(**self.params).fit(train_matrix,
                                                 None,
                                                 eval_set=[(test_matrix,
                                                            "eval")])

        reg.predict(test_matrix)