def _fit(self, X_train, y_train, X_val=None, y_val=None, time_limit=None, **kwargs): start_time = time.time() invalid_params = ['num_threads', 'num_gpus'] for invalid in invalid_params: if invalid in self.params: self.params.pop(invalid) params = self.params.copy() max_category_levels = params.pop('proc.max_category_levels', 100) verbosity = kwargs.get('verbosity', 2) if verbosity <= 2: verbose = False elif verbosity == 3: verbose = True verbose_eval = 50 else: verbose = True verbose_eval = 1 X_train = self.preprocess(X_train, is_train=True, max_category_levels=max_category_levels) num_rows_train = X_train.shape[0] eval_set = [] eval_metric = self.get_eval_metric() if X_val is None: early_stopping_rounds = 150 eval_set.append((X_train, y_train)) # TODO: if the train dataset is large, use sample of train dataset for validation else: modifier = 1 if num_rows_train <= 10000 else 10000 / num_rows_train early_stopping_rounds = max(round(modifier * 150), 10) X_val = self.preprocess(X_val, is_train=False) eval_set.append((X_val, y_val)) try_import_xgboost() from .callbacks import print_evaluation, early_stop_custom callbacks = [] if verbose: callbacks.append(print_evaluation(verbose_eval)) # TODO: disable early stopping during refit_full callbacks.append(early_stop_custom(early_stopping_rounds, start_time=start_time, time_limit=time_limit, verbose=verbose)) from xgboost import XGBClassifier, XGBRegressor model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor self.model = model_type(**params) self.model.fit( X=X_train, y=y_train, eval_set=eval_set, eval_metric=eval_metric, verbose=False, callbacks=callbacks ) bst = self.model.get_booster() self.params_trained['n_estimators'] = bst.best_ntree_limit self._best_ntree_limit = bst.best_ntree_limit
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, num_cpus=None, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs): # TODO: utilize sample_weight_val in early-stopping if provided start_time = time.time() ag_params = self._get_ag_params() params = self._get_model_params() if num_cpus: params['n_jobs'] = num_cpus max_category_levels = params.pop('proc.max_category_levels', 100) if verbosity <= 2: verbose = False log_period = None elif verbosity == 3: verbose = True log_period = 50 else: verbose = True log_period = 1 X = self.preprocess(X, is_train=True, max_category_levels=max_category_levels) num_rows_train = X.shape[0] eval_set = [] eval_metric = self.get_eval_metric() if X_val is None: early_stopping_rounds = None eval_set = None else: X_val = self.preprocess(X_val, is_train=False) eval_set.append((X_val, y_val)) early_stopping_rounds = ag_params.get('ag.early_stop', 'adaptive') if isinstance(early_stopping_rounds, (str, tuple, list)): early_stopping_rounds = self._get_early_stopping_rounds( num_rows_train=num_rows_train, strategy=early_stopping_rounds) if num_gpus != 0: params['tree_method'] = 'gpu_hist' if 'gpu_id' not in params: params['gpu_id'] = 0 elif 'tree_method' not in params: params['tree_method'] = 'hist' try_import_xgboost() from .callbacks import EarlyStoppingCustom from xgboost.callback import EvaluationMonitor callbacks = [] if eval_set is not None: if log_period is not None: callbacks.append(EvaluationMonitor(period=log_period)) callbacks.append( EarlyStoppingCustom(early_stopping_rounds, start_time=start_time, time_limit=time_limit, verbose=verbose)) from xgboost import XGBClassifier, XGBRegressor model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor if 'eval_metric' not in params and params.get( 'objective') == 'binary:logistic': # avoid unnecessary warning from XGBoost v1.3.0 params['eval_metric'] = 'logloss' self.model = model_type(**params) self.model.fit(X=X, y=y, eval_set=eval_set, eval_metric=eval_metric, verbose=False, callbacks=callbacks, sample_weight=sample_weight) bst = self.model.get_booster() # TODO: Investigate speed-ups from GPU inference # bst.set_param({"predictor": "gpu_predictor"}) self.params_trained['n_estimators'] = bst.best_ntree_limit
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_gpus=0, sample_weight=None, sample_weight_val=None, verbosity=2, **kwargs): # TODO: utilize sample_weight_val in early-stopping if provided start_time = time.time() ag_params = self._get_ag_params() params = self._get_model_params() max_category_levels = params.pop('proc.max_category_levels', 100) if verbosity <= 2: verbose = False verbose_eval = None elif verbosity == 3: verbose = True verbose_eval = 50 else: verbose = True verbose_eval = 1 X = self.preprocess(X, is_train=True, max_category_levels=max_category_levels) num_rows_train = X.shape[0] eval_set = [] eval_metric = self.get_eval_metric() if X_val is None: early_stopping_rounds = None eval_set.append( (X, y) ) # TODO: if the train dataset is large, use sample of train dataset for validation else: X_val = self.preprocess(X_val, is_train=False) eval_set.append((X_val, y_val)) early_stopping_rounds = ag_params.get('ag.early_stop', 'auto') if isinstance(early_stopping_rounds, str): early_stopping_rounds = self._get_early_stopping_rounds( num_rows_train=num_rows_train, strategy=early_stopping_rounds) if num_gpus != 0: params['tree_method'] = 'gpu_hist' if 'gpu_id' not in params: params['gpu_id'] = 0 try_import_xgboost() from .callbacks import EarlyStoppingCustom from xgboost.callback import EvaluationMonitor callbacks = [] if verbose_eval is not None: callbacks.append(EvaluationMonitor(period=verbose_eval)) # TODO: disable early stopping during refit_full callbacks.append( EarlyStoppingCustom(early_stopping_rounds, start_time=start_time, time_limit=time_limit, verbose=verbose)) from xgboost import XGBClassifier, XGBRegressor model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor self.model = model_type(**params) self.model.fit(X=X, y=y, eval_set=eval_set, eval_metric=eval_metric, verbose=False, callbacks=callbacks, sample_weight=sample_weight) bst = self.model.get_booster() # TODO: Investigate speed-ups from GPU inference # bst.set_param({"predictor": "gpu_predictor"}) self.params_trained['n_estimators'] = bst.best_ntree_limit self._best_ntree_limit = bst.best_ntree_limit
def _fit(self, X_train, y_train, X_val=None, y_val=None, time_limit=None, num_gpus=0, **kwargs): start_time = time.time() params = self.params.copy() max_category_levels = params.pop('proc.max_category_levels', 100) verbosity = kwargs.get('verbosity', 2) if verbosity <= 2: verbose = False verbose_eval = None elif verbosity == 3: verbose = True verbose_eval = 50 else: verbose = True verbose_eval = 1 X_train = self.preprocess(X_train, is_train=True, max_category_levels=max_category_levels) num_rows_train = X_train.shape[0] eval_set = [] eval_metric = self.get_eval_metric() if X_val is None: early_stopping_rounds = 150 eval_set.append( (X_train, y_train) ) # TODO: if the train dataset is large, use sample of train dataset for validation else: modifier = 1 if num_rows_train <= 10000 else 10000 / num_rows_train early_stopping_rounds = max(round(modifier * 150), 10) X_val = self.preprocess(X_val, is_train=False) eval_set.append((X_val, y_val)) if num_gpus != 0: params['tree_method'] = 'gpu_hist' if 'gpu_id' not in params: params['gpu_id'] = 0 try_import_xgboost() from .callbacks import EarlyStoppingCustom from xgboost.callback import EvaluationMonitor callbacks = [] if verbose_eval is not None: callbacks.append(EvaluationMonitor(period=verbose_eval)) # TODO: disable early stopping during refit_full callbacks.append( EarlyStoppingCustom(early_stopping_rounds, start_time=start_time, time_limit=time_limit, verbose=verbose)) from xgboost import XGBClassifier, XGBRegressor model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor self.model = model_type(**params) self.model.fit(X=X_train, y=y_train, eval_set=eval_set, eval_metric=eval_metric, verbose=False, callbacks=callbacks) bst = self.model.get_booster() # TODO: Investigate speed-ups from GPU inference # bst.set_param({"predictor": "gpu_predictor"}) self.params_trained['n_estimators'] = bst.best_ntree_limit self._best_ntree_limit = bst.best_ntree_limit