Exemple #1
0
    def _fit(self, X_train, y_train, X_val=None, y_val=None, time_limit=None, **kwargs):
        start_time = time.time()
        
        invalid_params = ['num_threads', 'num_gpus']
        for invalid in invalid_params:
            if invalid in self.params:
                self.params.pop(invalid)
        params = self.params.copy()
        max_category_levels = params.pop('proc.max_category_levels', 100)

        verbosity = kwargs.get('verbosity', 2)
        if verbosity <= 2:
            verbose = False
        elif verbosity == 3:
            verbose = True
            verbose_eval = 50
        else:
            verbose = True
            verbose_eval = 1
        
        X_train = self.preprocess(X_train, is_train=True, max_category_levels=max_category_levels)
        num_rows_train = X_train.shape[0]

        eval_set = []
        eval_metric = self.get_eval_metric()

        if X_val is None:
            early_stopping_rounds = 150
            eval_set.append((X_train, y_train))  # TODO: if the train dataset is large, use sample of train dataset for validation
        else:
            modifier = 1 if num_rows_train <= 10000 else 10000 / num_rows_train
            early_stopping_rounds = max(round(modifier * 150), 10)
            X_val = self.preprocess(X_val, is_train=False)
            eval_set.append((X_val, y_val))

        try_import_xgboost()
        from .callbacks import print_evaluation, early_stop_custom
        callbacks = []
        if verbose:
            callbacks.append(print_evaluation(verbose_eval))
        # TODO: disable early stopping during refit_full
        callbacks.append(early_stop_custom(early_stopping_rounds, start_time=start_time, time_limit=time_limit, verbose=verbose))

        from xgboost import XGBClassifier, XGBRegressor
        model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
        self.model = model_type(**params)
        self.model.fit(
            X=X_train,
            y=y_train,
            eval_set=eval_set,
            eval_metric=eval_metric,
            verbose=False,
            callbacks=callbacks
        )

        bst = self.model.get_booster()
        self.params_trained['n_estimators'] = bst.best_ntree_limit
        self._best_ntree_limit = bst.best_ntree_limit
Exemple #2
0
    def _fit(self,
             X,
             y,
             X_val=None,
             y_val=None,
             time_limit=None,
             num_gpus=0,
             num_cpus=None,
             sample_weight=None,
             sample_weight_val=None,
             verbosity=2,
             **kwargs):
        # TODO: utilize sample_weight_val in early-stopping if provided
        start_time = time.time()
        ag_params = self._get_ag_params()
        params = self._get_model_params()
        if num_cpus:
            params['n_jobs'] = num_cpus
        max_category_levels = params.pop('proc.max_category_levels', 100)

        if verbosity <= 2:
            verbose = False
            log_period = None
        elif verbosity == 3:
            verbose = True
            log_period = 50
        else:
            verbose = True
            log_period = 1

        X = self.preprocess(X,
                            is_train=True,
                            max_category_levels=max_category_levels)
        num_rows_train = X.shape[0]

        eval_set = []
        eval_metric = self.get_eval_metric()

        if X_val is None:
            early_stopping_rounds = None
            eval_set = None
        else:
            X_val = self.preprocess(X_val, is_train=False)
            eval_set.append((X_val, y_val))
            early_stopping_rounds = ag_params.get('ag.early_stop', 'adaptive')
            if isinstance(early_stopping_rounds, (str, tuple, list)):
                early_stopping_rounds = self._get_early_stopping_rounds(
                    num_rows_train=num_rows_train,
                    strategy=early_stopping_rounds)

        if num_gpus != 0:
            params['tree_method'] = 'gpu_hist'
            if 'gpu_id' not in params:
                params['gpu_id'] = 0
        elif 'tree_method' not in params:
            params['tree_method'] = 'hist'

        try_import_xgboost()
        from .callbacks import EarlyStoppingCustom
        from xgboost.callback import EvaluationMonitor
        callbacks = []
        if eval_set is not None:
            if log_period is not None:
                callbacks.append(EvaluationMonitor(period=log_period))
            callbacks.append(
                EarlyStoppingCustom(early_stopping_rounds,
                                    start_time=start_time,
                                    time_limit=time_limit,
                                    verbose=verbose))

        from xgboost import XGBClassifier, XGBRegressor
        model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
        if 'eval_metric' not in params and params.get(
                'objective') == 'binary:logistic':
            # avoid unnecessary warning from XGBoost v1.3.0
            params['eval_metric'] = 'logloss'
        self.model = model_type(**params)
        self.model.fit(X=X,
                       y=y,
                       eval_set=eval_set,
                       eval_metric=eval_metric,
                       verbose=False,
                       callbacks=callbacks,
                       sample_weight=sample_weight)

        bst = self.model.get_booster()
        # TODO: Investigate speed-ups from GPU inference
        # bst.set_param({"predictor": "gpu_predictor"})

        self.params_trained['n_estimators'] = bst.best_ntree_limit
Exemple #3
0
    def _fit(self,
             X,
             y,
             X_val=None,
             y_val=None,
             time_limit=None,
             num_gpus=0,
             sample_weight=None,
             sample_weight_val=None,
             verbosity=2,
             **kwargs):
        # TODO: utilize sample_weight_val in early-stopping if provided
        start_time = time.time()
        ag_params = self._get_ag_params()
        params = self._get_model_params()
        max_category_levels = params.pop('proc.max_category_levels', 100)

        if verbosity <= 2:
            verbose = False
            verbose_eval = None
        elif verbosity == 3:
            verbose = True
            verbose_eval = 50
        else:
            verbose = True
            verbose_eval = 1

        X = self.preprocess(X,
                            is_train=True,
                            max_category_levels=max_category_levels)
        num_rows_train = X.shape[0]

        eval_set = []
        eval_metric = self.get_eval_metric()

        if X_val is None:
            early_stopping_rounds = None
            eval_set.append(
                (X, y)
            )  # TODO: if the train dataset is large, use sample of train dataset for validation
        else:
            X_val = self.preprocess(X_val, is_train=False)
            eval_set.append((X_val, y_val))
            early_stopping_rounds = ag_params.get('ag.early_stop', 'auto')
            if isinstance(early_stopping_rounds, str):
                early_stopping_rounds = self._get_early_stopping_rounds(
                    num_rows_train=num_rows_train,
                    strategy=early_stopping_rounds)

        if num_gpus != 0:
            params['tree_method'] = 'gpu_hist'
            if 'gpu_id' not in params:
                params['gpu_id'] = 0

        try_import_xgboost()
        from .callbacks import EarlyStoppingCustom
        from xgboost.callback import EvaluationMonitor
        callbacks = []
        if verbose_eval is not None:
            callbacks.append(EvaluationMonitor(period=verbose_eval))
        # TODO: disable early stopping during refit_full
        callbacks.append(
            EarlyStoppingCustom(early_stopping_rounds,
                                start_time=start_time,
                                time_limit=time_limit,
                                verbose=verbose))

        from xgboost import XGBClassifier, XGBRegressor
        model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
        self.model = model_type(**params)
        self.model.fit(X=X,
                       y=y,
                       eval_set=eval_set,
                       eval_metric=eval_metric,
                       verbose=False,
                       callbacks=callbacks,
                       sample_weight=sample_weight)

        bst = self.model.get_booster()
        # TODO: Investigate speed-ups from GPU inference
        # bst.set_param({"predictor": "gpu_predictor"})

        self.params_trained['n_estimators'] = bst.best_ntree_limit
        self._best_ntree_limit = bst.best_ntree_limit
Exemple #4
0
    def _fit(self,
             X_train,
             y_train,
             X_val=None,
             y_val=None,
             time_limit=None,
             num_gpus=0,
             **kwargs):
        start_time = time.time()

        params = self.params.copy()
        max_category_levels = params.pop('proc.max_category_levels', 100)

        verbosity = kwargs.get('verbosity', 2)
        if verbosity <= 2:
            verbose = False
            verbose_eval = None
        elif verbosity == 3:
            verbose = True
            verbose_eval = 50
        else:
            verbose = True
            verbose_eval = 1

        X_train = self.preprocess(X_train,
                                  is_train=True,
                                  max_category_levels=max_category_levels)
        num_rows_train = X_train.shape[0]

        eval_set = []
        eval_metric = self.get_eval_metric()

        if X_val is None:
            early_stopping_rounds = 150
            eval_set.append(
                (X_train, y_train)
            )  # TODO: if the train dataset is large, use sample of train dataset for validation
        else:
            modifier = 1 if num_rows_train <= 10000 else 10000 / num_rows_train
            early_stopping_rounds = max(round(modifier * 150), 10)
            X_val = self.preprocess(X_val, is_train=False)
            eval_set.append((X_val, y_val))

        if num_gpus != 0:
            params['tree_method'] = 'gpu_hist'
            if 'gpu_id' not in params:
                params['gpu_id'] = 0

        try_import_xgboost()
        from .callbacks import EarlyStoppingCustom
        from xgboost.callback import EvaluationMonitor
        callbacks = []
        if verbose_eval is not None:
            callbacks.append(EvaluationMonitor(period=verbose_eval))
        # TODO: disable early stopping during refit_full
        callbacks.append(
            EarlyStoppingCustom(early_stopping_rounds,
                                start_time=start_time,
                                time_limit=time_limit,
                                verbose=verbose))

        from xgboost import XGBClassifier, XGBRegressor
        model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
        self.model = model_type(**params)
        self.model.fit(X=X_train,
                       y=y_train,
                       eval_set=eval_set,
                       eval_metric=eval_metric,
                       verbose=False,
                       callbacks=callbacks)

        bst = self.model.get_booster()
        # TODO: Investigate speed-ups from GPU inference
        # bst.set_param({"predictor": "gpu_predictor"})

        self.params_trained['n_estimators'] = bst.best_ntree_limit
        self._best_ntree_limit = bst.best_ntree_limit