def evals_result(self): """Return the evaluation results. If eval_set is passed to the `fit` function, you can call evals_result() to get evaluation results for all passed eval_sets. When eval_metric is also passed to the `fit` function, the evals_result will contain the eval_metrics passed to the `fit` function Returns ------- evals_result : dictionary Example ------- param_dist = {'objective':'binary:logistic', 'n_estimators':2} clf = xgb.XGBClassifier(**param_dist) clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], eval_metric='logloss', verbose=True) evals_result = clf.evals_result() The variable evals_result will contain: {'validation_0': {'logloss': ['0.604835', '0.531479']}, 'validation_1': {'logloss': ['0.41965', '0.17686']}} """ if self.evals_result_: evals_result = self.evals_result_ else: raise XGBoostError('No results.') return evals_result
def booster(self): """Get the underlying xgboost Booster of this model. This will raise an exception when fit was not called Returns ------- booster : a xgboost booster of underlying model """ if self._Booster is None: raise XGBoostError('need to call fit beforehand') return self._Booster
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, seed=0, missing=None, num_boost_round=1): if not SKLEARN_INSTALLED: raise XGBoostError( 'sklearn needs to be installed in order to use this module') self.max_depth = max_depth self.learning_rate = learning_rate self.n_estimators = n_estimators self.silent = silent self.objective = objective self.nthread = nthread self.gamma = gamma self.min_child_weight = min_child_weight self.max_delta_step = max_delta_step self.subsample = subsample self.colsample_bytree = colsample_bytree self.colsample_bylevel = colsample_bylevel self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda self.scale_pos_weight = scale_pos_weight self.base_score = base_score self.seed = seed self.missing = missing if missing is not None else np.nan self.num_boost_round = num_boost_round self._Booster = None
def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None, metrics=(), obj=None, feval=None, maximize=False, early_stopping_rounds=None, fpreproc=None, as_pandas=True, verbose_eval=None, show_stdv=True, seed=0, callbacks=None, shuffle=True): # pylint: disable = invalid-name """Cross-validation with given parameters. Parameters ---------- params : dict Booster params. dtrain : DMatrix Data to be trained. num_boost_round : int Number of boosting iterations. nfold : int Number of folds in CV. stratified : bool Perform stratified sampling. folds : a KFold or StratifiedKFold instance Sklearn KFolds or StratifiedKFolds. metrics : string or list of strings Evaluation metrics to be watched in CV. obj : function Custom objective function. feval : function Custom evaluation function. maximize : bool Whether to maximize feval. early_stopping_rounds: int Activates early stopping. CV error needs to decrease at least every <early_stopping_rounds> round(s) to continue. Last entry in evaluation history is the one from best iteration. fpreproc : function Preprocessing function that takes (dtrain, dtest, param) and returns transformed versions of those. as_pandas : bool, default True Return pd.DataFrame when pandas is installed. If False or pandas is not installed, return np.ndarray verbose_eval : bool, int, or None, default None Whether to display the progress. If None, progress will be displayed when np.ndarray is returned. If True, progress will be displayed at boosting stage. If an integer is given, progress will be displayed at every given `verbose_eval` boosting stage. show_stdv : bool, default True Whether to display the standard deviation in progress. Results are not affected, and always contains std. seed : int Seed used to generate the folds (passed to numpy.random.seed). callbacks : list of callback functions List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using xgb.callback module. Example: [xgb.callback.reset_learning_rate(custom_rates)] shuffle : bool Shuffle data before creating folds. Returns ------- evaluation history : list(string) """ if stratified is True and not SKLEARN_INSTALLED: raise XGBoostError( 'sklearn needs to be installed in order to use stratified cv') if isinstance(metrics, str): metrics = [metrics] if isinstance(params, list): _metrics = [x[1] for x in params if x[0] == 'eval_metric'] params = dict(params) if 'eval_metric' in params: params['eval_metric'] = _metrics else: params = dict((k, v) for k, v in params.items()) if len(metrics) == 0 and 'eval_metric' in params: if isinstance(params['eval_metric'], list): metrics = params['eval_metric'] else: metrics = [params['eval_metric']] params.pop("eval_metric", None) results = {} cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc, stratified, folds, shuffle) # setup callbacks callbacks = [] if callbacks is None else callbacks if early_stopping_rounds is not None: callbacks.append( callback.early_stop(early_stopping_rounds, maximize=maximize, verbose=False)) if isinstance(verbose_eval, bool) and verbose_eval: callbacks.append(callback.print_evaluation(show_stdv=show_stdv)) else: if isinstance(verbose_eval, int): callbacks.append( callback.print_evaluation(verbose_eval, show_stdv=show_stdv)) callbacks_before_iter = [ cb for cb in callbacks if cb.__dict__.get('before_iteration', False) ] callbacks_after_iter = [ cb for cb in callbacks if not cb.__dict__.get('before_iteration', False) ] for i in range(num_boost_round): for cb in callbacks_before_iter: cb( CallbackEnv(model=None, cvfolds=cvfolds, iteration=i, begin_iteration=0, end_iteration=num_boost_round, rank=0, evaluation_result_list=None)) for fold in cvfolds: fold.update(i, obj) res = aggcv([f.eval(i, feval) for f in cvfolds]) for key, mean, std in res: if key + '-mean' not in results: results[key + '-mean'] = [] if key + '-std' not in results: results[key + '-std'] = [] results[key + '-mean'].append(mean) results[key + '-std'].append(std) try: for cb in callbacks_after_iter: cb( CallbackEnv(model=None, cvfolds=cvfolds, iteration=i, begin_iteration=0, end_iteration=num_boost_round, rank=0, evaluation_result_list=res)) except EarlyStopException as e: for k in results.keys(): results[k] = results[k][:(e.best_iteration + 1)] break if as_pandas: try: import pandas as pd results = pd.DataFrame.from_dict(results) except ImportError: pass return (results, cvfolds)