コード例 #1
0
 def callback(env):
     """internal function"""
     score_train = env.evaluation_result_list[0][1]
     score = env.evaluation_result_list[1][1]
     if not state:
         init(env)
     best_score = state['best_score']
     best_iteration = state['best_iteration']
     maximize_score = state['maximize_score']
     if (maximize_score and score > best_score and ((not symetric_overfit and score_train - score <= max_overfit) or
                                                    (symetric_overfit and abs(score_train - score) <= max_overfit))) or \
             (not maximize_score and score < best_score and ((not symetric_overfit and score - score_train <= max_overfit) or
                                                             (symetric_overfit and abs(score - score_train) <= max_overfit))):
         msg = '[%d]\t%s' % (env.iteration, '\t'.join(
             [_fmt_metric(x) for x in env.evaluation_result_list]))
         state['best_msg'] = msg
         state['best_score'] = score
         state['best_score_train'] = score_train
         state['best_iteration'] = env.iteration
         # save the property to attributes, so they will occur in checkpoint.
         if env.model is not None:
             env.model.set_attr(best_score=str(state['best_score']),
                                best_score_train=str(
                                    state['best_score_train']),
                                best_iteration=str(state['best_iteration']),
                                best_msg=state['best_msg'])
     elif env.iteration - best_iteration >= stopping_rounds:
         best_msg = state['best_msg']
         if verbose and env.rank == 0:
             msg = "Stopping. Best iteration:\n{}\n\n"
             rabit.tracker_print(msg.format(best_msg))
         raise EarlyStopException(best_iteration)
コード例 #2
0
    def callback(env):
        """internal function"""
        if not state:
            init(env)

        bst = env.model
        i = env.iteration
        cvfolds = env.cvfolds

        res_dict = {}

        ##### evaluation #####
        if cvfolds is not None:
            for feval in fevals:
                tmp = aggcv([f.eval(i, feval) for f in cvfolds])
                for k, mean, std in tmp:
                    res_dict[k] = [mean, std]
        else:
            for feval in fevals:
                bst_eval = bst.eval_set(evals, i, feval)
                res = [x.split(":") for x in bst_eval.split()]
                for kv in res[1:]:
                    res_dict[kv[0]] = [float(kv[1])]

        eval_res = []
        keys = list(res_dict.keys())
        keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x)
        for key in keys:
            v = res_dict[key]
            eval_res.append([key] + v)

        ##### print eval result #####
        infos = ["XGB iter: %3d" % i]
        for item in eval_res:
            if "null" in item[0]:
                continue
            infos.append("%s: %.6f" % (item[0], item[1]))

        if not isinstance(verbose_eval,
                          bool) and verbose_eval and i % verbose_eval == 0:
            logger.debug("\t".join(infos))
        if log_file:
            with open(log_file, "a") as fout:
                fout.write("\t".join(infos) + "\n")

        ##### choose score and do early stopping #####
        score = None
        for item in eval_res:
            if item[0] == metric:
                score = item[1]
                break
        assert score is not None

        best_score = state["best_score"]
        best_iteration = state["best_iteration"]
        maximize_score = state["maximize_score"]
        if (maximize_score and score > best_score) or (not maximize_score
                                                       and score < best_score):
            msg = "[%d] %s" % (env.iteration, "\t".join(
                [_fmt_metric(x) for x in eval_res]))
            state["best_msg"] = msg
            state["best_score"] = score
            state["best_iteration"] = env.iteration
            # save the property to attributes, so they will occur in checkpoint.
            if env.model is not None:
                env.model.set_attr(
                    best_score=str(state["best_score"]),
                    best_iteration=str(state["best_iteration"]),
                    best_msg=state["best_msg"],
                )
        elif env.iteration - best_iteration >= stopping_rounds:
            best_msg = state["best_msg"]
            if verbose_eval and env.rank == 0:
                logger.debug("XGB stopped. Best iteration: %s ", best_msg)
            raise EarlyStopException(best_iteration)
コード例 #3
0
ファイル: xgb_cost_model.py プロジェクト: zhuyawen/akg
    def callback(env):
        """internal function"""
        if not best_state:
            init(env)

        bst = env.model
        i = env.iteration
        cvfolds = env.cvfolds

        res_dict = {}

        ##### evaluation #####
        if cvfolds is not None:
            for feval in fevals:
                tmp = aggcv([f.eval(i, feval) for f in cvfolds])
                for k, mean, std in tmp:
                    res_dict[k] = [mean, std]
        else:
            for feval in fevals:
                bst_eval = bst.eval_set(evals, i, feval)
                res = [x.split(':') for x in bst_eval.split()]
                for kv in res[1:]:
                    res_dict[kv[0]] = [float(kv[1])]

        eval_res = []
        keys = list(res_dict.keys())
        keys.sort(key=lambda x: x if metric_shortname not in x else "a" + x)
        for key in keys:
            v = res_dict[key]
            eval_res.append([key] + v)

        ##### print eval result #####
        infos = ["XGB iter: %3d" % i]
        for item in eval_res:
            if 'null' in item[0]:
                continue
            infos.append("%s: %.6f" % (item[0], item[1]))

        if not isinstance(verbose_eval, bool) and verbose_eval and i % verbose_eval == 0:
            logger.debug("\t".join(infos))
        if log_file:
            with open(log_file, "a") as fout:
                fout.write("\t".join(infos) + '\n')

        ##### choose score and do early stopping #####
        score = None
        for item in eval_res:
            if item[0] == metric:
                score = item[1]
                break

        best_score = best_state['score']
        best_iteration = best_state['iteration']
        if score and score > best_score:
            msg = '[%d] %s' % (env.iteration, '\t'.join([_fmt_metric(x) for x in eval_res]))
            if bst is not None:
                bst.set_attr(best_score=str(score), best_iteration=str(env.iteration), best_msg=msg)
            best_state['msg'] = msg
            best_state['score'] = score
            best_state['iteration'] = env.iteration
        elif env.iteration - best_iteration >= stopping_rounds:
            best_msg = best_state['msg']
            if verbose_eval and env.rank == 0:
                logger.debug("XGB stopped. Best iteration: %s ", best_msg)
            raise EarlyStopException(best_iteration)
コード例 #4
0
    def callback(env):
        """internal function"""
        if not state:
            init(env)
        score = env.evaluation_result_list[-1][1]
        best_score = state['best_score']
        best_iteration = state['best_iteration']
        maximize_score = state['maximize_score']

        if (maximize_score and score > best_score) or \
                (not maximize_score and score < best_score):
            msg = '[%d]\t%s' % (env.iteration, '\t'.join(
                [_fmt_metric(x) for x in env.evaluation_result_list]))
            state['best_msg'] = msg
            state['best_score'] = score
            state['best_iteration'] = env.iteration
            # save the property to attributes, so they will occur in checkpoint.
            if env.model is not None:
                env.model.set_attr(best_score=str(state['best_score']),
                                   best_iteration=str(state['best_iteration']),
                                   best_msg=state['best_msg'])
        elif env.iteration - best_iteration >= stopping_rounds:
            best_msg = state['best_msg']
            if verbose and env.rank == 0:
                msg = "Stopping. Best iteration: {}"
                logger.log(15, msg.format(best_msg))
            raise EarlyStopException(best_iteration)

        if env.iteration % 10 == 0:
            available = psutil.virtual_memory().available
            cur_rss = mem_status.memory_info().rss
            if cur_rss < state['init_mem_rss']:
                state['init_mem_rss'] = cur_rss
            estimated_model_size_mb = (cur_rss - state['init_mem_rss']) >> 20
            available_mb = available >> 20

            model_size_memory_ratio = estimated_model_size_mb / available_mb
            if verbose and (model_size_memory_ratio > 0.25):
                logger.debug(15, f'Available Memory: {available_mb} MB')
                logger.debug(
                    15,
                    f'Estimated XGB model size: {estimated_model_size_mb} MB')

            early_stop = False
            if (model_size_memory_ratio > 1.0) or (available_mb < 512):
                logger.warning(
                    'Warning: Large XGB model size may cause OOM error if training continues'
                )
                logger.warning(f'Available Memory: {available_mb} MB')
                logger.warning(
                    f'Estimated XGB model size: {estimated_model_size_mb} MB')
                early_stop = True

            if early_stop:
                if verbose and env.rank == 0:
                    logger.warning(
                        f'Warning: Early stopped GBM model prior to optimal result to avoid OOM error. Please increase available memory to avoid subpar model quality.\n'
                    )
                    logger.warning(
                        f'Early stopping. best iteration is: [{best_iteration}]\t{best_score}'
                    )
                raise EarlyStopException(best_iteration)

        if time_limit:
            time_elapsed = time.time() - start_time
            time_left = time_limit - time_elapsed
            if time_left <= 0:
                if verbose and env.rank == 0:
                    logger.log(
                        20,
                        f"Ran out of time, early stopping on iteration {env.iteration}. Best iteration is: \t[{best_iteration}]\t{best_score}"
                    )
                    logger.log(20, state['best_msg'])
                raise EarlyStopException(best_iteration)
コード例 #5
0
ファイル: xgb_model.py プロジェクト: wenxcs/tvm
    def callback(env: "xgb.core.CallbackEnv"):
        # pylint:disable = import-outside-toplevel
        import xgboost as xgb
        from xgboost.callback import _fmt_metric  # type: ignore
        from xgboost.core import EarlyStopException  # type: ignore

        try:
            from xgboost.training import aggcv  # type: ignore
        except ImportError:
            from xgboost.callback import _aggcv as aggcv  # type: ignore
        # pylint:enable = import-outside-toplevel

        if not state:
            init(env)
        booster: xgb.Booster = env.model
        iteration: int = env.iteration
        cvfolds: List[xgb.training.CVPack] = env.cvfolds
        ##### Evaluation #####
        # `eval_result` is a list of (key, score)
        eval_result: List[Tuple[str, float]] = []
        if cvfolds is None:
            eval_result = list(
                itertools_chain.from_iterable([(key, float(value))
                                               for key, value in map(
                                                   lambda x: x.split(":"),
                                                   booster.eval_set(
                                                       evals=evals,
                                                       iteration=iteration,
                                                       feval=feval,
                                                   ).split()[1:],
                                               )] for feval in fevals))
        else:
            eval_result = list(
                itertools_chain.from_iterable([(key, score)
                                               for key, score, _std in aggcv(
                                                   fold.eval(
                                                       iteration=iteration,
                                                       feval=feval,
                                                   ) for fold in cvfolds)]
                                              for feval in fevals))
        eval_result = list(eval_result)
        eval_result.sort(key=sort_key)

        ##### Print eval result #####
        if verbose_eval and iteration % verbose_eval == 0:
            info = []
            for key, score in eval_result:
                if "null" not in key:
                    info.append(f"{key}: {score:.6f}")
            logger.debug("XGB iter %3d: %s", iteration, "\t".join(info))

        ##### Choose score and do early stopping #####
        score = None
        for key, _score in eval_result:
            if key == focused_metric:
                score = _score
                break
        assert score is not None

        best_score = state["best_score"]
        best_iteration = state["best_iteration"]
        if score < best_score:
            tab = "\t"  # to work with f-string
            msg = f"[{env.iteration}] {tab.join([_fmt_metric(x) for x in eval_result])}"
            state["best_msg"] = msg
            state["best_score"] = score
            state["best_iteration"] = env.iteration
            # save the property to attributes, so they will occur in checkpoint.
            if env.model is not None:
                env.model.set_attr(
                    best_score=str(state["best_score"]),
                    best_iteration=str(state["best_iteration"]),
                    best_msg=state["best_msg"],
                )
        elif env.iteration - best_iteration >= early_stopping_rounds:
            best_msg = state["best_msg"]
            if verbose_eval and env.rank == 0:
                logger.debug("XGB stopped. Best iteration: %s ", best_msg)
            raise EarlyStopException(best_iteration)