Exemple #1
0
    def test_ranking(self):
        # generate random data
        x_train = np.random.rand(1000, 10)
        y_train = np.random.randint(5, size=1000)
        train_qid = np.repeat(np.array([list(range(20))]), 50)

        x_valid = np.random.rand(200, 10)
        y_valid = np.random.randint(5, size=200)
        valid_qid = np.repeat(np.array([list(range(4))]), 50)

        x_test = np.random.rand(100, 10)

        params = {
            "objective": "rank:pairwise",
            "learning_rate": 0.1,
            "gamma": 1.0,
            "min_child_weight": 0.1,
            "max_depth": 6,
            "n_estimators": 4,
            "random_state": 1,
            "n_jobs": 2
        }
        model = RayXGBRanker(**params)
        model.fit(
            x_train,
            y_train,
            qid=train_qid,
            eval_set=[(x_valid, y_valid)],
            eval_qid=[valid_qid])
        assert model.evals_result()

        pred = model.predict(x_test)

        train_data = RayDMatrix(x_train, y_train, qid=train_qid)
        valid_data = RayDMatrix(x_valid, y_valid, qid=valid_qid)
        test_data = RayDMatrix(x_test)

        params_orig = {
            "objective": "rank:pairwise",
            "eta": 0.1,
            "gamma": 1.0,
            "min_child_weight": 0.1,
            "max_depth": 6,
            "random_state": 1
        }
        xgb_model_orig = train(
            params_orig,
            train_data,
            num_boost_round=4,
            evals=[(valid_data, "validation")],
            ray_params=RayParams(num_actors=2, max_actor_restarts=0))
        pred_orig = predict(
            xgb_model_orig,
            test_data,
            ray_params=RayParams(num_actors=2, max_actor_restarts=0))

        np.testing.assert_almost_equal(pred, pred_orig)
Exemple #2
0
    def test_best_ntree_limit(self):
        self._init_ray()

        from sklearn.datasets import load_iris

        X, y = load_iris(return_X_y=True)

        def train(booster, forest):
            rounds = 4
            cls = RayXGBClassifier(
                n_estimators=rounds, num_parallel_tree=forest,
                booster=booster).fit(
                    X, y, eval_set=[(X, y)], early_stopping_rounds=3)

            if forest:
                assert cls.best_ntree_limit == rounds * forest
            else:
                assert cls.best_ntree_limit == 0

            # best_ntree_limit is used by default,
            # assert that under gblinear it's
            # automatically ignored due to being 0.
            cls.predict(X)

        num_parallel_tree = 4
        train("gbtree", num_parallel_tree)
        train("dart", num_parallel_tree)
        train("gblinear", None)
Exemple #3
0
    def fit(
        self,
        X,
        y,
        *,
        group=None,
        qid=None,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_group=None,
        eval_qid=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=False,
        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ):

        if not (group is None and eval_group is None):
            raise ValueError("Use `qid` instead of `group` for RayXGBRanker.")
        if qid is None:
            raise ValueError("`qid` is required for ranking.")

        if eval_set is not None:
            if eval_qid is None:
                raise ValueError("`eval_qid `is required if"
                                 " `eval_set` is not None")

        evals_result = {}
        ray_dmatrix_params = ray_dmatrix_params or {}

        params = self.get_xgb_params()

        train_dmatrix, evals = _check_if_params_are_ray_dmatrix(
            X, sample_weight, base_margin, eval_set, sample_weight_eval_set,
            base_margin_eval_set, eval_qid)

        if train_dmatrix is None:
            train_dmatrix, evals = _wrap_evaluation_matrices(
                missing=self.missing,
                X=X,
                y=y,
                group=group,
                qid=qid,
                sample_weight=sample_weight,
                base_margin=base_margin,
                feature_weights=feature_weights,
                eval_set=eval_set,
                sample_weight_eval_set=sample_weight_eval_set,
                base_margin_eval_set=base_margin_eval_set,
                eval_group=eval_group,
                eval_qid=eval_qid,
                # changed in xgboost-ray:
                create_dmatrix=lambda **kwargs: RayDMatrix(**{
                    **kwargs,
                    **ray_dmatrix_params
                }),
                **self._ray_get_wrap_evaluation_matrices_compat_kwargs())

        try:
            model, feval, params = self._configure_fit(xgb_model, eval_metric,
                                                       params)
        except TypeError:
            # XGBoost >= 1.6.0
            (model, feval, params, early_stopping_rounds,
             callbacks) = self._configure_fit(xgb_model, eval_metric, params,
                                              early_stopping_rounds, callbacks)
        if callable(feval):
            raise ValueError(
                "Custom evaluation metric is not yet supported for XGBRanker.")

        # remove those as they will be set in RayXGBoostActor
        params.pop("n_jobs", None)
        params.pop("nthread", None)

        ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs)

        additional_results = {}

        self._Booster = train(
            params,
            train_dmatrix,
            self.n_estimators,
            early_stopping_rounds=early_stopping_rounds,
            evals=evals,
            evals_result=evals_result,
            feval=feval,
            verbose_eval=verbose,
            xgb_model=model,
            callbacks=callbacks,
            # changed in xgboost-ray:
            additional_results=additional_results,
            ray_params=ray_params,
            _remote=_remote,
        )

        self.objective = params["objective"]

        self.additional_results_ = additional_results

        self._set_evaluation_result(evals_result)
        return self
Exemple #4
0
    def fit(
        self,
        X,
        y,
        *,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=True,
        xgb_model=None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ):

        evals_result = {}
        ray_dmatrix_params = ray_dmatrix_params or {}

        params = self.get_xgb_params()

        train_dmatrix, evals = _check_if_params_are_ray_dmatrix(
            X, sample_weight, base_margin, eval_set, sample_weight_eval_set,
            base_margin_eval_set)

        if train_dmatrix is not None:
            if not hasattr(self, "use_label_encoder"):
                warnings.warn("If X is a RayDMatrix, no label encoding"
                              " will be performed. Ensure the labels are"
                              " encoded.")
            elif self.use_label_encoder:
                raise ValueError(
                    "X cannot be a RayDMatrix if `use_label_encoder` "
                    "is set to True")
            if "num_class" not in params:
                raise ValueError(
                    "`num_class` must be set during initalization if X"
                    " is a RayDMatrix")
            self.classes_ = list(range(0, params["num_class"]))
            self.n_classes_ = params["num_class"]
            if self.n_classes_ <= 2:
                params.pop("num_class")
            label_transform = lambda x: x  # noqa: E731
        else:
            if len(X.shape) != 2:
                # Simply raise an error here since there might be many
                # different ways of reshaping
                raise ValueError(
                    "Please reshape the input data X into 2-dimensional "
                    "matrix.")

            label_transform = self._ray_fit_preprocess(y)

        if callable(self.objective):
            obj = _objective_decorator(self.objective)
            # Use default value. Is it really not used ?
            params["objective"] = "binary:logistic"
        else:
            obj = None

        if self.n_classes_ > 2:
            # Switch to using a multiclass objective in the underlying
            # XGB instance
            params["objective"] = "multi:softprob"
            params["num_class"] = self.n_classes_

        try:
            model, feval, params = self._configure_fit(xgb_model, eval_metric,
                                                       params)
        except TypeError:
            # XGBoost >= 1.6.0
            (model, feval, params, early_stopping_rounds,
             callbacks) = self._configure_fit(xgb_model, eval_metric, params,
                                              early_stopping_rounds, callbacks)

        if train_dmatrix is None:
            train_dmatrix, evals = _wrap_evaluation_matrices(
                missing=self.missing,
                X=X,
                y=y,
                group=None,
                qid=None,
                sample_weight=sample_weight,
                base_margin=base_margin,
                feature_weights=feature_weights,
                eval_set=eval_set,
                sample_weight_eval_set=sample_weight_eval_set,
                base_margin_eval_set=base_margin_eval_set,
                eval_group=None,
                eval_qid=None,
                # changed in xgboost-ray:
                create_dmatrix=lambda **kwargs: RayDMatrix(**{
                    **kwargs,
                    **ray_dmatrix_params
                }),
                **self._ray_get_wrap_evaluation_matrices_compat_kwargs(
                    label_transform=label_transform))

        # remove those as they will be set in RayXGBoostActor
        params.pop("n_jobs", None)
        params.pop("nthread", None)

        ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs)

        additional_results = {}

        self._Booster = train(
            params,
            train_dmatrix,
            self.get_num_boosting_rounds(),
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            evals_result=evals_result,
            obj=obj,
            feval=feval,
            verbose_eval=verbose,
            xgb_model=model,
            callbacks=callbacks,
            # changed in xgboost-ray:
            additional_results=additional_results,
            ray_params=ray_params,
            _remote=_remote,
        )

        if not callable(self.objective):
            self.objective = params["objective"]

        self.additional_results_ = additional_results

        self._set_evaluation_result(evals_result)
        return self
Exemple #5
0
    def fit(
        self,
        X,
        y,
        *,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=True,
        xgb_model: Optional[Union[Booster, str, "XGBModel"]] = None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ):
        evals_result = {}
        ray_dmatrix_params = ray_dmatrix_params or {}

        train_dmatrix, evals = _check_if_params_are_ray_dmatrix(
            X, sample_weight, base_margin, eval_set, sample_weight_eval_set,
            base_margin_eval_set)

        if train_dmatrix is None:
            train_dmatrix, evals = _wrap_evaluation_matrices(
                missing=self.missing,
                X=X,
                y=y,
                group=None,
                qid=None,
                sample_weight=sample_weight,
                base_margin=base_margin,
                feature_weights=feature_weights,
                eval_set=eval_set,
                sample_weight_eval_set=sample_weight_eval_set,
                base_margin_eval_set=base_margin_eval_set,
                eval_group=None,
                eval_qid=None,
                # changed in xgboost-ray:
                create_dmatrix=lambda **kwargs: RayDMatrix(**{
                    **kwargs,
                    **ray_dmatrix_params
                }),
                **self._ray_get_wrap_evaluation_matrices_compat_kwargs())

        params = self.get_xgb_params()

        if callable(self.objective):
            obj = _objective_decorator(self.objective)
            params["objective"] = "reg:squarederror"
        else:
            obj = None

        try:
            model, feval, params = self._configure_fit(xgb_model, eval_metric,
                                                       params)
        except TypeError:
            # XGBoost >= 1.6.0
            (model, feval, params, early_stopping_rounds,
             callbacks) = self._configure_fit(xgb_model, eval_metric, params,
                                              early_stopping_rounds, callbacks)

        # remove those as they will be set in RayXGBoostActor
        params.pop("n_jobs", None)
        params.pop("nthread", None)

        ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs)

        additional_results = {}

        self._Booster = train(
            params,
            train_dmatrix,
            self.get_num_boosting_rounds(),
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            evals_result=evals_result,
            obj=obj,
            feval=feval,
            verbose_eval=verbose,
            xgb_model=model,
            callbacks=callbacks,
            # changed in xgboost-ray:
            additional_results=additional_results,
            ray_params=ray_params,
            _remote=_remote,
        )

        self.additional_results_ = additional_results

        self._set_evaluation_result(evals_result)
        return self