def __init__(self, booster: Booster, tree_index: int, x_data, y_data, feature_names: List[str] = None, target_name: str = None, class_names: (List[str], Mapping[int, str]) = None): if hasattr(booster, 'get_booster'): booster = booster.get_booster( ) # support XGBClassifier and XGBRegressor utils.check_tree_index(tree_index, len(booster.get_dump())) self.booster = booster self.tree_index = tree_index self.tree_to_dataframe = self._get_tree_dataframe() self.children_left = self._calculate_children( self.__class__.LEFT_CHILDREN_COLUMN) self.children_right = self._calculate_children( self.__class__.RIGHT_CHILDREN_COLUMN) self.config = json.loads(self.booster.save_config()) self.node_to_samples = None # lazy initialized self.features = None # lazy initialized super().__init__(booster, x_data, y_data, feature_names, target_name, class_names)
def _train_internal(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, xgb_model=None, callbacks=None): """internal training function""" callbacks = [] if callbacks is None else callbacks evals = list(evals) if isinstance(params, dict) \ and 'eval_metric' in params \ and isinstance(params['eval_metric'], list): params = dict((k, v) for k, v in params.items()) eval_metrics = params['eval_metric'] params.pop("eval_metric", None) params = list(params.items()) for eval_metric in eval_metrics: params += [('eval_metric', eval_metric)] bst = Booster(params, [dtrain] + [d[0] for d in evals]) nboost = 0 num_parallel_tree = 1 if xgb_model is not None: if not isinstance(xgb_model, STRING_TYPES): xgb_model = xgb_model.save_raw() bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model) nboost = len(bst.get_dump()) _params = dict(params) if isinstance(params, list) else params if 'num_parallel_tree' in _params: num_parallel_tree = _params['num_parallel_tree'] nboost //= num_parallel_tree if 'num_class' in _params: nboost //= _params['num_class'] # Distributed code: Load the checkpoint from rabit. version = bst.load_rabit_checkpoint() assert (rabit.get_world_size() != 1 or version == 0) rank = rabit.get_rank() start_iteration = int(version / 2) nboost += start_iteration callbacks_before_iter = [ cb for cb in callbacks if cb.__dict__.get('before_iteration', False) ] callbacks_after_iter = [ cb for cb in callbacks if not cb.__dict__.get('before_iteration', False) ] for i in range(nboost, num_boost_round): for cb in callbacks_before_iter: cb( CallbackEnv(model=bst, cvfolds=None, iteration=i, begin_iteration=start_iteration, end_iteration=num_boost_round, rank=rank, evaluation_result_list=None)) # Distributed code: need to resume to this point. # Skip the first update if it is a recovery step. if version % 2 == 0: bst.update(dtrain, i, obj) bst.save_rabit_checkpoint() version += 1 assert (rabit.get_world_size() == 1 or version == rabit.version_number()) nboost += 1 evaluation_result_list = [] # check evaluation result. if len(evals) != 0: bst_eval_set = bst.eval_set(evals, i, feval) if isinstance(bst_eval_set, STRING_TYPES): msg = bst_eval_set else: msg = bst_eval_set.decode() res = [x.split(':') for x in msg.split()] evaluation_result_list = [(k, float(v)) for k, v in res[1:]] try: for cb in callbacks_after_iter: cb( CallbackEnv(model=bst, cvfolds=None, iteration=i, begin_iteration=start_iteration, end_iteration=num_boost_round, rank=rank, evaluation_result_list=evaluation_result_list)) except EarlyStopException: break # do checkpoint after evaluation, in case evaluation also updates booster. bst.save_rabit_checkpoint() version += 1 if bst.attr('best_score') is not None: bst.best_score = float(bst.attr('best_score')) bst.best_iteration = int(bst.attr('best_iteration')) else: bst.best_iteration = nboost - 1 bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree return bst
def _train_internal(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, xgb_model=None, callbacks=None): """internal training function""" callbacks = [] if callbacks is None else callbacks evals = list(evals) if isinstance(params, dict) \ and 'eval_metric' in params \ and isinstance(params['eval_metric'], list): params = dict((k, v) for k, v in params.items()) eval_metrics = params['eval_metric'] params.pop("eval_metric", None) params = list(params.items()) for eval_metric in eval_metrics: params += [('eval_metric', eval_metric)] bst = Booster(params, [dtrain] + [d[0] for d in evals]) nboost = 0 num_parallel_tree = 1 if xgb_model is not None: if not isinstance(xgb_model, STRING_TYPES): xgb_model = xgb_model.save_raw() bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model) nboost = len(bst.get_dump()) _params = dict(params) if isinstance(params, list) else params if 'num_parallel_tree' in _params: num_parallel_tree = _params['num_parallel_tree'] nboost //= num_parallel_tree if 'num_class' in _params: nboost //= _params['num_class'] # Distributed code: Load the checkpoint from rabit. version = bst.load_rabit_checkpoint() assert (rabit.get_world_size() != 1 or version == 0) rank = rabit.get_rank() start_iteration = int(version / 2) nboost += start_iteration callbacks_before_iter = [ cb for cb in callbacks if cb.__dict__.get('before_iteration', False)] callbacks_after_iter = [ cb for cb in callbacks if not cb.__dict__.get('before_iteration', False)] for i in range(nboost, num_boost_round): for cb in callbacks_before_iter: cb(CallbackEnv(model=bst, cvfolds=None, iteration=i, begin_iteration=start_iteration, end_iteration=num_boost_round, rank=rank, evaluation_result_list=None)) # Distributed code: need to resume to this point. # Skip the first update if it is a recovery step. if version % 2 == 0: bst.update(dtrain, i, obj) bst.save_rabit_checkpoint() version += 1 assert ( rabit.get_world_size() == 1 or version == rabit.version_number()) nboost += 1 evaluation_result_list = [] # check evaluation result. if len(evals) != 0: bst_eval_set = bst.eval_set(evals, i, feval) if isinstance(bst_eval_set, STRING_TYPES): msg = bst_eval_set else: msg = bst_eval_set.decode() res = [x.split(':') for x in msg.split()] evaluation_result_list = [(k, float(v)) for k, v in res[1:]] try: for cb in callbacks_after_iter: cb(CallbackEnv(model=bst, cvfolds=None, iteration=i, begin_iteration=start_iteration, end_iteration=num_boost_round, rank=rank, evaluation_result_list=evaluation_result_list)) except EarlyStopException: break # do checkpoint after evaluation, in case evaluation also updates booster. bst.save_rabit_checkpoint() version += 1 if bst.attr('best_score') is not None: bst.best_score = float(bst.attr('best_score')) bst.best_iteration = int(bst.attr('best_iteration')) else: bst.best_iteration = nboost - 1 bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree return bst