def callback(env): """internal function""" score_train = env.evaluation_result_list[0][1] score = env.evaluation_result_list[1][1] if not state: init(env) best_score = state['best_score'] best_iteration = state['best_iteration'] maximize_score = state['maximize_score'] if (maximize_score and score > best_score and ((not symetric_overfit and score_train - score <= max_overfit) or (symetric_overfit and abs(score_train - score) <= max_overfit))) or \ (not maximize_score and score < best_score and ((not symetric_overfit and score - score_train <= max_overfit) or (symetric_overfit and abs(score - score_train) <= max_overfit))): msg = '[%d]\t%s' % (env.iteration, '\t'.join( [_fmt_metric(x) for x in env.evaluation_result_list])) state['best_msg'] = msg state['best_score'] = score state['best_score_train'] = score_train state['best_iteration'] = env.iteration # save the property to attributes, so they will occur in checkpoint. if env.model is not None: env.model.set_attr(best_score=str(state['best_score']), best_score_train=str( state['best_score_train']), best_iteration=str(state['best_iteration']), best_msg=state['best_msg']) elif env.iteration - best_iteration >= stopping_rounds: best_msg = state['best_msg'] if verbose and env.rank == 0: msg = "Stopping. Best iteration:\n{}\n\n" rabit.tracker_print(msg.format(best_msg)) raise EarlyStopException(best_iteration)
def callback(env): """internal function""" if not state: init(env) bst = env.model i = env.iteration cvfolds = env.cvfolds res_dict = {} ##### evaluation ##### if cvfolds is not None: for feval in fevals: tmp = aggcv([f.eval(i, feval) for f in cvfolds]) for k, mean, std in tmp: res_dict[k] = [mean, std] else: for feval in fevals: bst_eval = bst.eval_set(evals, i, feval) res = [x.split(":") for x in bst_eval.split()] for kv in res[1:]: res_dict[kv[0]] = [float(kv[1])] eval_res = [] keys = list(res_dict.keys()) keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x) for key in keys: v = res_dict[key] eval_res.append([key] + v) ##### print eval result ##### infos = ["XGB iter: %3d" % i] for item in eval_res: if "null" in item[0]: continue infos.append("%s: %.6f" % (item[0], item[1])) if not isinstance(verbose_eval, bool) and verbose_eval and i % verbose_eval == 0: logger.debug("\t".join(infos)) if log_file: with open(log_file, "a") as fout: fout.write("\t".join(infos) + "\n") ##### choose score and do early stopping ##### score = None for item in eval_res: if item[0] == metric: score = item[1] break assert score is not None best_score = state["best_score"] best_iteration = state["best_iteration"] maximize_score = state["maximize_score"] if (maximize_score and score > best_score) or (not maximize_score and score < best_score): msg = "[%d] %s" % (env.iteration, "\t".join( [_fmt_metric(x) for x in eval_res])) state["best_msg"] = msg state["best_score"] = score state["best_iteration"] = env.iteration # save the property to attributes, so they will occur in checkpoint. if env.model is not None: env.model.set_attr( best_score=str(state["best_score"]), best_iteration=str(state["best_iteration"]), best_msg=state["best_msg"], ) elif env.iteration - best_iteration >= stopping_rounds: best_msg = state["best_msg"] if verbose_eval and env.rank == 0: logger.debug("XGB stopped. Best iteration: %s ", best_msg) raise EarlyStopException(best_iteration)
def callback(env): """internal function""" if not best_state: init(env) bst = env.model i = env.iteration cvfolds = env.cvfolds res_dict = {} ##### evaluation ##### if cvfolds is not None: for feval in fevals: tmp = aggcv([f.eval(i, feval) for f in cvfolds]) for k, mean, std in tmp: res_dict[k] = [mean, std] else: for feval in fevals: bst_eval = bst.eval_set(evals, i, feval) res = [x.split(':') for x in bst_eval.split()] for kv in res[1:]: res_dict[kv[0]] = [float(kv[1])] eval_res = [] keys = list(res_dict.keys()) keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x) for key in keys: v = res_dict[key] eval_res.append([key] + v) ##### print eval result ##### infos = ["XGB iter: %3d" % i] for item in eval_res: if 'null' in item[0]: continue infos.append("%s: %.6f" % (item[0], item[1])) if not isinstance(verbose_eval, bool) and verbose_eval and i % verbose_eval == 0: logger.debug("\t".join(infos)) if log_file: with open(log_file, "a") as fout: fout.write("\t".join(infos) + '\n') ##### choose score and do early stopping ##### score = None for item in eval_res: if item[0] == metric: score = item[1] break best_score = best_state['score'] best_iteration = best_state['iteration'] if score and score > best_score: msg = '[%d] %s' % (env.iteration, '\t'.join([_fmt_metric(x) for x in eval_res])) if bst is not None: bst.set_attr(best_score=str(score), best_iteration=str(env.iteration), best_msg=msg) best_state['msg'] = msg best_state['score'] = score best_state['iteration'] = env.iteration elif env.iteration - best_iteration >= stopping_rounds: best_msg = best_state['msg'] if verbose_eval and env.rank == 0: logger.debug("XGB stopped. Best iteration: %s ", best_msg) raise EarlyStopException(best_iteration)
def callback(env): """internal function""" if not state: init(env) score = env.evaluation_result_list[-1][1] best_score = state['best_score'] best_iteration = state['best_iteration'] maximize_score = state['maximize_score'] if (maximize_score and score > best_score) or \ (not maximize_score and score < best_score): msg = '[%d]\t%s' % (env.iteration, '\t'.join( [_fmt_metric(x) for x in env.evaluation_result_list])) state['best_msg'] = msg state['best_score'] = score state['best_iteration'] = env.iteration # save the property to attributes, so they will occur in checkpoint. if env.model is not None: env.model.set_attr(best_score=str(state['best_score']), best_iteration=str(state['best_iteration']), best_msg=state['best_msg']) elif env.iteration - best_iteration >= stopping_rounds: best_msg = state['best_msg'] if verbose and env.rank == 0: msg = "Stopping. Best iteration: {}" logger.log(15, msg.format(best_msg)) raise EarlyStopException(best_iteration) if env.iteration % 10 == 0: available = psutil.virtual_memory().available cur_rss = mem_status.memory_info().rss if cur_rss < state['init_mem_rss']: state['init_mem_rss'] = cur_rss estimated_model_size_mb = (cur_rss - state['init_mem_rss']) >> 20 available_mb = available >> 20 model_size_memory_ratio = estimated_model_size_mb / available_mb if verbose and (model_size_memory_ratio > 0.25): logger.debug(15, f'Available Memory: {available_mb} MB') logger.debug( 15, f'Estimated XGB model size: {estimated_model_size_mb} MB') early_stop = False if (model_size_memory_ratio > 1.0) or (available_mb < 512): logger.warning( 'Warning: Large XGB model size may cause OOM error if training continues' ) logger.warning(f'Available Memory: {available_mb} MB') logger.warning( f'Estimated XGB model size: {estimated_model_size_mb} MB') early_stop = True if early_stop: if verbose and env.rank == 0: logger.warning( f'Warning: Early stopped GBM model prior to optimal result to avoid OOM error. Please increase available memory to avoid subpar model quality.\n' ) logger.warning( f'Early stopping. best iteration is: [{best_iteration}]\t{best_score}' ) raise EarlyStopException(best_iteration) if time_limit: time_elapsed = time.time() - start_time time_left = time_limit - time_elapsed if time_left <= 0: if verbose and env.rank == 0: logger.log( 20, f"Ran out of time, early stopping on iteration {env.iteration}. Best iteration is: \t[{best_iteration}]\t{best_score}" ) logger.log(20, state['best_msg']) raise EarlyStopException(best_iteration)
def callback(env: "xgb.core.CallbackEnv"): # pylint:disable = import-outside-toplevel import xgboost as xgb from xgboost.callback import _fmt_metric # type: ignore from xgboost.core import EarlyStopException # type: ignore try: from xgboost.training import aggcv # type: ignore except ImportError: from xgboost.callback import _aggcv as aggcv # type: ignore # pylint:enable = import-outside-toplevel if not state: init(env) booster: xgb.Booster = env.model iteration: int = env.iteration cvfolds: List[xgb.training.CVPack] = env.cvfolds ##### Evaluation ##### # `eval_result` is a list of (key, score) eval_result: List[Tuple[str, float]] = [] if cvfolds is None: eval_result = list( itertools_chain.from_iterable([(key, float(value)) for key, value in map( lambda x: x.split(":"), booster.eval_set( evals=evals, iteration=iteration, feval=feval, ).split()[1:], )] for feval in fevals)) else: eval_result = list( itertools_chain.from_iterable([(key, score) for key, score, _std in aggcv( fold.eval( iteration=iteration, feval=feval, ) for fold in cvfolds)] for feval in fevals)) eval_result = list(eval_result) eval_result.sort(key=sort_key) ##### Print eval result ##### if verbose_eval and iteration % verbose_eval == 0: info = [] for key, score in eval_result: if "null" not in key: info.append(f"{key}: {score:.6f}") logger.debug("XGB iter %3d: %s", iteration, "\t".join(info)) ##### Choose score and do early stopping ##### score = None for key, _score in eval_result: if key == focused_metric: score = _score break assert score is not None best_score = state["best_score"] best_iteration = state["best_iteration"] if score < best_score: tab = "\t" # to work with f-string msg = f"[{env.iteration}] {tab.join([_fmt_metric(x) for x in eval_result])}" state["best_msg"] = msg state["best_score"] = score state["best_iteration"] = env.iteration # save the property to attributes, so they will occur in checkpoint. if env.model is not None: env.model.set_attr( best_score=str(state["best_score"]), best_iteration=str(state["best_iteration"]), best_msg=state["best_msg"], ) elif env.iteration - best_iteration >= early_stopping_rounds: best_msg = state["best_msg"] if verbose_eval and env.rank == 0: logger.debug("XGB stopped. Best iteration: %s ", best_msg) raise EarlyStopException(best_iteration)