Beispiel #1
0
def numerical_check(_net, _cfg: Configuration, train_data, test_data, dump_result=False):  # pragma: no cover
    ctx = _cfg.ctx
    batch_size = _cfg.batch_size

    _net.initialize(ctx=ctx)

    bp_loss_f = get_bp_loss(**_cfg.loss_params)
    loss_function = {}
    loss_function.update(bp_loss_f)

    from longling.ML.MxnetHelper.glue import module
    from longling.ML.toolkit import EvalFormatter as Formatter
    from longling.ML.toolkit import MovingLoss
    from tqdm import tqdm

    loss_monitor = MovingLoss(loss_function)
    progress_monitor = tqdm
    if dump_result:
        from longling import config_logging
        validation_logger = config_logging(
            filename=path_append(_cfg.model_dir, "result.log"),
            logger="%s-validation" % _cfg.model_name,
            mode="w",
            log_format="%(message)s",
        )
        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=_cfg.validation_result_file,
        )
    else:
        evaluation_formatter = Formatter()

    # train check
    trainer = module.Module.get_trainer(
        _net, optimizer=_cfg.optimizer,
        optimizer_params=_cfg.optimizer_params,
        select=_cfg.train_select
    )

    for epoch in range(_cfg.begin_epoch, _cfg.end_epoch):
        for batch_data in progress_monitor(train_data, "Epoch: %s" % epoch):
            fit_f(
                net=_net, batch_size=batch_size, batch_data=batch_data,
                trainer=trainer, bp_loss_f=bp_loss_f,
                loss_function=loss_function,
                loss_monitor=loss_monitor,
                ctx=ctx,
            )

        if epoch % 1 == 0:
            if epoch % 1 == 0:
                print(
                    evaluation_formatter(
                        epoch=epoch,
                        loss_name_value=dict(loss_monitor.items()),
                        eval_name_value=eval_f(_net, test_data, ctx=ctx),
                        extra_info=None,
                        dump=True,
                    )[0]
                )
Beispiel #2
0
    def toolbox_init(
        self,
        evaluation_formatter_parameters=None,
        validation_logger_mode="w",
        silent=False,
    ):

        from longling import path_append
        from longling.lib.clock import Clock
        from longling.lib.utilog import config_logging
        from longling.ML.toolkit import EvalFormatter as Formatter
        from longling.ML.toolkit import MovingLoss, ConsoleProgressMonitor as ProgressMonitor

        self.toolbox = {
            "monitor": dict(),
            "timer": None,
            "formatter": dict(),
        }

        mod = self.mod
        cfg = self.mod.cfg

        # 4.1 todo 定义损失函数
        # bp_loss_f 定义了用来进行 back propagation 的损失函数,
        # 有且只能有一个,命名中不能为 *_\d+ 型

        assert self.loss_function is not None

        loss_monitor = MovingLoss(self.loss_function)

        # 4.1 todo 初始化一些训练过程中的交互信息
        timer = Clock()

        progress_monitor = ProgressMonitor(
            indexes={"Loss": [name for name in self.loss_function]},
            values={"Loss": loss_monitor.losses},
            end_epoch=cfg.end_epoch - 1,
            silent=silent)

        validation_logger = config_logging(
            filename=path_append(cfg.model_dir, "result.log"),
            logger="%s-validation" % cfg.model_name,
            mode=validation_logger_mode,
            log_format="%(message)s",
        )

        # set evaluation formatter
        evaluation_formatter_parameters = {} \
            if evaluation_formatter_parameters is None \
            else evaluation_formatter_parameters

        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=mod.cfg.validation_result_file,
            **evaluation_formatter_parameters)

        self.toolbox["monitor"]["loss"] = loss_monitor
        self.toolbox["monitor"]["progress"] = progress_monitor
        self.toolbox["timer"] = timer
        self.toolbox["formatter"]["evaluation"] = evaluation_formatter
Beispiel #3
0
    def test(cls, test_filename, test_epoch, dump_file=None, **kwargs):
        from longling.ML.toolkit.formatter import EpochEvalFMT as Formatter
        formatter = Formatter(dump_file=dump_file)
        module = cls.load(test_epoch, **kwargs)

        test_data = module.etl(test_filename)
        eval_result = module.mod.eval(module.net, test_data)
        formatter(tips="test", eval_name_value=eval_result)
        return eval_result
Beispiel #4
0
Datei: run.py Projekt: tswsxk/XKT
def numerical_check(_net,
                    _cfg: Configuration,
                    train_data,
                    test_data,
                    dump_result=False,
                    reporthook=None,
                    final_reporthook=None):  # pragma: no cover
    ctx = _cfg.ctx
    batch_size = _cfg.batch_size

    _net.initialize(ctx=ctx)

    bp_loss_f = get_bp_loss(**_cfg.loss_params)
    loss_function = {}
    loss_function.update(bp_loss_f)

    from longling.ML.MxnetHelper.glue import module
    from longling.ML.toolkit import EpochEvalFMT as Formatter
    from longling.ML.toolkit import MovingLoss
    from tqdm import tqdm

    loss_monitor = MovingLoss(loss_function)
    progress_monitor = tqdm
    if dump_result:
        from longling import config_logging
        validation_logger = config_logging(
            filename=path_append(_cfg.model_dir, "result.log"),
            logger="%s-validation" % _cfg.model_name,
            mode="w",
            log_format="%(message)s",
        )
        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=_cfg.validation_result_file,
        )
    else:
        evaluation_formatter = Formatter()

    # train check
    trainer = module.Module.get_trainer(_net,
                                        optimizer=_cfg.optimizer,
                                        optimizer_params=_cfg.optimizer_params,
                                        select=_cfg.train_select)

    for epoch in range(_cfg.begin_epoch, _cfg.end_epoch):
        for i, batch_data in enumerate(
                progress_monitor(train_data, "Epoch: %s" % epoch)):
            fit_f(
                net=_net,
                batch_size=batch_size,
                batch_data=batch_data,
                trainer=trainer,
                bp_loss_f=bp_loss_f,
                loss_function=loss_function,
                loss_monitor=loss_monitor,
                ctx=ctx,
            )
        if _cfg.lr_params and "update_params" in _cfg.lr_params:
            _cfg.logger.info("reset trainer")
            lr_params = _cfg.lr_params.pop("update_params")
            lr_update_params = dict(batches_per_epoch=i + 1,
                                    lr=_cfg.optimizer_params["learning_rate"],
                                    update_epoch=lr_params.get(
                                        "update_epoch",
                                        _cfg.end_epoch - _cfg.begin_epoch - 1))
            lr_update_params.update(lr_params)

            trainer = module.Module.get_trainer(
                _net,
                optimizer=_cfg.optimizer,
                optimizer_params=_cfg.optimizer_params,
                lr_params=lr_update_params,
                select=_cfg.train_select,
                logger=_cfg.logger)

        if epoch % 1 == 0:
            msg, data = evaluation_formatter(iteration=epoch,
                                             loss_name_value=dict(
                                                 loss_monitor.items()),
                                             eval_name_value=eval_f(_net,
                                                                    test_data,
                                                                    ctx=ctx),
                                             extra_info=None,
                                             dump=dump_result,
                                             keep={"msg", "data"})
            print(msg)
            if reporthook is not None:
                reporthook(data)

    if final_reporthook is not None:
        final_reporthook()
Beispiel #5
0
    def toolbox_init(
            self,
            evaluation_formatter_parameters=None,
            validation_logger_mode="w",
            silent=False,
    ):

        from longling import path_append
        from longling.lib.clock import Clock
        from longling.lib.utilog import config_logging
        from longling.ML.toolkit import EpochEvalFMT as Formatter
        from longling.ML.toolkit import MovingLoss, ConsoleProgressMonitor as ProgressMonitor

        self.toolbox = {
            "monitor": dict(),
            "timer": None,
            "formatter": dict(),
        }

        mod = self.mod
        cfg = self.mod.cfg

        assert self.loss_function is not None

        loss_monitor = MovingLoss(self.loss_function)

        timer = Clock()

        progress_monitor = ProgressMonitor(
            indexes={
                "Loss": [name for name in self.loss_function]
            },
            values={
                "Loss": loss_monitor.losses
            },
            silent=silent,
            player_type="epoch",
            total_epoch=cfg.end_epoch - 1
        )

        validation_logger = config_logging(
            filename=path_append(cfg.model_dir, "result.log"),
            logger="%s-validation" % cfg.model_name,
            mode=validation_logger_mode,
            log_format="%(message)s",
        )

        # set evaluation formatter
        evaluation_formatter_parameters = {} \
            if evaluation_formatter_parameters is None \
            else evaluation_formatter_parameters

        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=mod.cfg.validation_result_file,
            **evaluation_formatter_parameters
        )

        self.toolbox["monitor"]["loss"] = loss_monitor
        self.toolbox["monitor"]["progress"] = progress_monitor
        self.toolbox["timer"] = timer
        self.toolbox["formatter"]["evaluation"] = evaluation_formatter
Beispiel #6
0
    def epoch_loop(self,
                   net,
                   begin_epoch,
                   end_epoch,
                   batch_size,
                   train_data,
                   trainer,
                   loss_function,
                   test_data=None,
                   ctx=mx.cpu(),
                   toolbox=None,
                   save_model=True,
                   eval_every_n_epoch=1,
                   **kwargs):
        """
        此函数包裹批次训练过程,形成轮次训练过程

        Parameters
        ----------
        net: HybridBlock
            The network which has been initialized or loaded from
            the existed model
        begin_epoch: int
            The begin epoch of this train procession
        end_epoch: int
            The end epoch of this train procession
        batch_size: int
            The size of each batch
        train_data: Iterable
            The data used for this train procession,
            NOTICE: should have been divided to batches
        trainer:
            The trainer used to update the parameters of the net
        loss_function: dict of function
            The functions to compute the loss for the procession
            of back propagation
        test_data: Iterable
            The data used for the evaluation at the end of each epoch,
            NOTICE: should have been divided to batches
            Default to ``None``
        ctx: Context or list of Context
            Defaults to ``mx.cpu()``.
        toolbox: Toolbox
            Default to ``None``
        save_model: bool
            Whether save model
        eval_every_n_epoch: int
        kwargs
        """
        # 参数修改时需要同步修改 fit 函数中的参数
        # 定义轮次训练过程
        if toolbox is not None:
            formatter = toolbox.get('formatter')
        else:
            formatter = None

        for epoch in range(begin_epoch, end_epoch):
            batch_num, loss_values = self.batch_loop(
                net=net,
                epoch=epoch,
                batch_size=batch_size,
                train_data=train_data,
                trainer=trainer,
                loss_function=loss_function,
                ctx=ctx,
                toolbox=toolbox,
            )
            if hasattr(self.cfg, "lr_params") and self.cfg.lr_params \
                    and "update_params" in self.cfg.lr_params and self.cfg.end_epoch - self.cfg.begin_epoch - 1 > 0:
                self.cfg.logger.info("reset trainer")
                lr_params = self.cfg.lr_params.pop("update_params")
                lr_update_params = dict(
                    batches_per_epoch=batch_num,
                    lr=self.cfg.optimizer_params["learning_rate"],
                    update_epoch=lr_params.get(
                        "update_epoch",
                        self.cfg.end_epoch - self.cfg.begin_epoch - 1))
                lr_update_params.update(lr_params)

                trainer = module.Module.get_trainer(
                    net,
                    optimizer=self.cfg.optimizer,
                    optimizer_params=self.cfg.optimizer_params,
                    lr_params=lr_update_params,
                    select=self.cfg.train_select,
                    logger=self.cfg.logger)

            try:
                train_time = toolbox["monitor"]["progress"].iteration_time
            except (KeyError, TypeError):
                train_time = None

            if (epoch - 1) % eval_every_n_epoch == 0 or epoch == end_epoch - 1:
                # # todo 定义每一轮结束后的模型评估方法
                evaluation_result = self.eval(net, test_data, ctx=ctx)

                evaluation_formatter = formatter.get(
                    'evaluation', Formatter()) if formatter else Formatter()

                print(
                    evaluation_formatter(
                        iteration=epoch,
                        train_time=train_time,
                        loss_name_value=loss_values,
                        eval_name_value=evaluation_result,
                        extra_info=None,
                        dump=True,
                        keep="msg",
                    ))

            # todo 定义模型保存方案
            if save_model:
                if epoch % kwargs.get('save_epoch', 1) == 0:
                    self.save_params(self.epoch_params_filepath(epoch), net)
Beispiel #7
0
def train(net,
          cfg,
          loss_function,
          trainer,
          train_data,
          test_data=None,
          params_save=False,
          dump_result=False,
          progress_monitor=None,
          *,
          fit_f,
          eval_f=None,
          net_init=None,
          get_net=None,
          get_loss=None,
          get_trainer=None,
          save_params=None,
          enable_hyper_search=False,
          reporthook=None,
          final_reporthook=None,
          primary_key=None,
          eval_epoch=1,
          loss_dict2tmt_loss=None,
          epoch_lr_scheduler=None,
          batch_lr_scheduler=None,
          loss_as_dict=False,
          verbose=None,
          dump_cfg=None,
          **cfg_kwargs):
    if enable_hyper_search:
        assert get_net is not None
        cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search(
            cfg_kwargs,
            reporthook,
            final_reporthook,
            primary_key=primary_key,
            with_keys="Epoch",
            dump=params_save)
        dump_result = tag
        verbose = tag if verbose is None else verbose
        cfg.update(**cfg_kwargs)
        print("hyper search enabled")
        print(cfg)

    verbose = True if verbose is None else verbose
    dump_cfg = dump_cfg if dump_cfg is not None else params_save
    if dump_cfg:
        cfg.dump()

    net = net if get_net is None else get_net(**cfg.hyper_params)

    if net_init is not None:
        net_init(net, cfg=cfg, initializer_kwargs=cfg.init_params)

    train_ctx = cfg.ctx if cfg.train_ctx is None else cfg.train_ctx
    eval_ctx = cfg.ctx if cfg.eval_ctx is None else cfg.eval_ctx
    batch_size = cfg.batch_size

    loss_function = get_loss(
        **cfg.loss_params) if get_loss is not None else loss_function

    if isinstance(loss_function, dict):
        _loss_function = loss_function
    else:
        if hasattr(loss_function, "__name__"):
            loss_name = loss_function.__name__
        elif hasattr(loss_function, "__class__"):
            loss_name = loss_function.__class__.__name__
        else:  # pragma: no cover
            loss_name = "loss"
        loss_function = {loss_name: loss_function}
        if loss_dict2tmt_loss is not None:
            loss_function = loss_dict2tmt_loss(loss_function)
        _loss_function = list(loss_function.values()
                              )[0] if loss_as_dict is False else loss_function

    loss_monitor = MovingLoss(loss_function)

    if progress_monitor is None and loss_dict2tmt_loss is not None:
        progress_monitor = ConsoleProgressMonitor(
            indexes={"Loss": [name for name in loss_function]},
            values={"Loss": loss_monitor.losses},
            player_type="epoch",
            total_epoch=cfg.end_epoch - 1,
            silent=not verbose)
    elif progress_monitor is None or progress_monitor == "tqdm":

        def progress_monitor(x, e):
            return tqdm(x, "Epoch: %s" % e, disable=not verbose)

    if dump_result:
        from longling import config_logging
        validation_logger = config_logging(
            filename=path_append(cfg.model_dir,
                                 cfg.get("result_log", RESULT_LOG)),
            logger="%s-validation" % cfg.model_name,
            mode="w",
            log_format="%(message)s",
        )
        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=cfg.validation_result_file,
        )
    else:
        evaluation_formatter = Formatter()

    # train check
    if get_trainer is not None:
        trainer = get_trainer(net,
                              optimizer=cfg.optimizer,
                              optimizer_params=cfg.optimizer_params,
                              select=cfg.train_select,
                              lr_params=cfg.lr_params)
        if batch_lr_scheduler is True:
            trainer, batch_lr_scheduler = trainer
        elif epoch_lr_scheduler is True:
            trainer, epoch_lr_scheduler = trainer

    for epoch in range(cfg.begin_epoch, cfg.end_epoch):
        for i, batch_data in enumerate(progress_monitor(train_data, epoch)):
            fit_f(
                net,
                batch_size=batch_size,
                batch_data=batch_data,
                trainer=trainer,
                loss_function=_loss_function,
                loss_monitor=loss_monitor,
                ctx=train_ctx,
            )
            if batch_lr_scheduler is not None:
                batch_lr_scheduler.step()

        if cfg.lr_params and "update_params" in cfg.lr_params and cfg.end_epoch - cfg.begin_epoch - 1 > 0:
            cfg.logger.info("reset trainer")
            lr_params = cfg.lr_params.pop("update_params")
            lr_update_params = dict(batches_per_epoch=i + 1,
                                    lr=cfg.optimizer_params["learning_rate"],
                                    update_epoch=lr_params.get(
                                        "update_epoch",
                                        cfg.end_epoch - cfg.begin_epoch - 1))
            lr_update_params.update(lr_params)

            assert get_trainer is not None
            trainer = get_trainer(net,
                                  optimizer=cfg.optimizer,
                                  optimizer_params=cfg.optimizer_params,
                                  lr_params=lr_update_params,
                                  select=cfg.train_select,
                                  logger=cfg.logger)

        if test_data is not None and epoch % eval_epoch == 0:
            msg, data = evaluation_formatter(
                iteration=epoch,
                loss_name_value=dict(loss_monitor.items()),
                eval_name_value=eval_f(net,
                                       test_data,
                                       ctx=eval_ctx,
                                       verbose=verbose,
                                       **cfg.get("eval_params", {})),
                extra_info=None,
                dump=dump_result,
                keep={"msg", "data"})
            print(msg)
            if reporthook is not None:
                reporthook(data)

        # optional
        loss_monitor.reset()

        if params_save and (epoch % cfg.save_epoch == 0
                            or epoch == cfg.end_epoch - 1):
            assert save_params is not None
            params_path = get_epoch_params_filepath(cfg.model_name, epoch,
                                                    cfg.model_dir)
            cfg.logger.info("save model params to %s, with select='%s'" %
                            (params_path, cfg.save_select))
            save_params(params_path, net, select=cfg.save_select)

        if epoch_lr_scheduler is not None:
            epoch_lr_scheduler.step()

    if final_reporthook is not None:
        final_reporthook()
Beispiel #8
0
def numerical_check(_net, _cfg: Configuration, train_data, test_data, dump_result=False,
                    reporthook=None, final_reporthook=None):  # pragma: no cover
    ctx = _cfg.ctx

    _net = set_device(_net, ctx)

    bp_loss_f = get_bp_loss(ctx, **_cfg.loss_params)
    loss_function = {}
    loss_function.update(bp_loss_f)

    from longling.ML.toolkit import EpochEvalFMT as Formatter
    from longling.ML.toolkit import MovingLoss
    from tqdm import tqdm

    loss_monitor = MovingLoss(loss_function)
    progress_monitor = tqdm
    if dump_result:
        from longling import config_logging
        validation_logger = config_logging(
            filename=path_append(_cfg.model_dir, "result.log"),
            logger="%s-validation" % _cfg.model_name,
            mode="w",
            log_format="%(message)s",
        )
        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=_cfg.validation_result_file,
        )
    else:
        evaluation_formatter = Formatter()

    # train check
    from longling.ML.PytorchHelper.toolkit.optimizer import get_trainer
    trainer = get_trainer(
        _net, optimizer=_cfg.optimizer,
        optimizer_params=_cfg.optimizer_params,
        select=_cfg.train_select
    )

    for epoch in range(_cfg.begin_epoch, _cfg.end_epoch):
        for batch_data in progress_monitor(train_data, "Epoch: %s" % epoch):
            fit_f(
                net=_net, batch_data=batch_data,
                trainer=trainer, bp_loss_f=bp_loss_f,
                loss_function=loss_function,
                loss_monitor=loss_monitor,
            )

        if epoch % 1 == 0:
            msg, data = evaluation_formatter(
                epoch=epoch,
                loss_name_value=dict(loss_monitor.items()),
                eval_name_value=eval_f(_net, test_data, ctx=ctx),
                extra_info=None,
                dump=dump_result,
            )
            print(msg)
            if reporthook is not None:
                reporthook(data)

        # optional, whether reset the loss at the end of each epoch
        loss_monitor.reset()

    if final_reporthook is not None:
        final_reporthook()
Beispiel #9
0
def get_default_toolbox(loss_function=None,
                        evaluation_formatter_parameters=None,
                        progress_monitor_parameters=None,
                        validation_logger_mode="w",
                        silent=False,
                        configuration=None):  # pragma: no cover
    """
    New in version 1.3.16

    todo: consider whether to keep it

    Notice
    ------
    The developer who modify this document should simultaneously modify the related function in glue
    """

    from longling import path_append
    from longling.lib.clock import Clock
    from longling.lib.utilog import config_logging
    from longling.ML.toolkit import EpochEvalFMT as Formatter
    from longling.ML.toolkit import MovingLoss, ConsoleProgressMonitor as ProgressMonitor

    cfg = configuration

    toolbox = {
        "monitor": dict(),
        "timer": None,
        "formatter": dict(),
    }

    loss_monitor = MovingLoss(loss_function) if loss_function else None

    timer = Clock()

    progress_monitor = ProgressMonitor(
        indexes={"Loss": [name
                          for name in loss_function]} if loss_function else {},
        values={"Loss": loss_monitor.losses} if loss_monitor else {},
        silent=silent,
        **progress_monitor_parameters
        if progress_monitor_parameters is not None else {})

    validation_logger = config_logging(
        filename=path_append(cfg.model_dir, "result.log") if hasattr(
            cfg, "model_dir") else None,
        logger="%s-validation" %
        cfg.model_name if hasattr(cfg, "model_name") else "model",
        mode=validation_logger_mode,
        log_format="%(message)s",
    )

    # set evaluation formatter
    evaluation_formatter_parameters = {} \
        if evaluation_formatter_parameters is None \
        else evaluation_formatter_parameters

    evaluation_formatter = Formatter(logger=validation_logger,
                                     dump_file=getattr(
                                         cfg, "validation_result_file", False),
                                     **evaluation_formatter_parameters)

    toolbox["monitor"]["loss"] = loss_monitor
    toolbox["monitor"]["progress"] = progress_monitor
    toolbox["timer"] = timer
    toolbox["formatter"]["evaluation"] = evaluation_formatter

    return toolbox