Exemple #1
0
def k_fold_experiment(hparams: Hparams):
    """
    k_fold training
    :param hparams:
    :return:
    """
    logger = logging.getLogger(__name__)
    if hparams.use_mixed_float16:
        logger.info("Use auto mixed policy")
        # tf.keras.mixed_precision.experimental.set_policy('mixed_float16')
        os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

    strategy = tf.distribute.MirroredStrategy(
        devices=[f"/gpu:{id}" for id in hparams.gpus])
    # build dataset

    model_saved_dirs = []

    for idx, (train_dataset, dev_dataset,
              dataset_info) in enumerate(load_dataset(hparams,
                                                      ret_test=False)):
        logger.info(f"Start {idx}th-fold training")
        with strategy.scope():
            # build model
            model, (losses,
                    loss_weights), metrics, optimizer = build_model(hparams)
            # build callbacks
            callbacks = build_callbacks(hparams.training.callbacks)
            # compile
            model.compile(optimizer=optimizer,
                          loss=losses,
                          metrics=metrics,
                          loss_weights=loss_weights)
            # fit
            if hparams.training.do_eval:
                validation_data = dev_dataset
                validation_steps = hparams.training.validation_steps
            else:
                logger.info("Do not evaluate.")
                validation_data = None
                validation_steps = None

            model.fit(
                train_dataset,
                validation_data=validation_data,
                epochs=hparams.training.max_epochs,
                callbacks=callbacks,
                steps_per_epoch=hparams.training.steps_per_epoch,
                validation_steps=validation_steps,
            )

            # build archive dir
            k_fold_dir = os.path.join(hparams.get_workspace_dir(), "k_fold",
                                      str(idx))
            if not os.path.exists(k_fold_dir):
                os.makedirs(k_fold_dir)

            # load best model
            checkpoint_dir = os.path.join(hparams.get_workspace_dir(),
                                          "checkpoint")
            if hparams.eval_use_best and os.path.exists(checkpoint_dir):
                logger.info(f"Load best model from {checkpoint_dir}")
                average_checkpoints(model, checkpoint_dir)
                logger.info(f"Move {checkpoint_dir, k_fold_dir}")
                shutil.move(checkpoint_dir, k_fold_dir)

            # save best model
            logger.info(
                f'Save {idx}th model in {hparams.get_model_filename()}')
            model.save_weights(hparams.get_model_filename(), save_format="tf")

        # eval on test dataset and make reports
        evaluation(hparams)
        logger.info(f"Move {hparams.get_report_dir()} to {k_fold_dir}")
        shutil.move(hparams.get_report_dir(), k_fold_dir)
        logger.info(f"Move {hparams.get_saved_model_dir()} to {k_fold_dir}")
        cur_model_saved_dir = shutil.move(hparams.get_saved_model_dir(),
                                          k_fold_dir)
        logger.info(
            f"New model saved path for {idx}th fold: {cur_model_saved_dir}")
        model_saved_dirs.append(cur_model_saved_dir)

        logger.info(f'{idx}th-fold experiment Finish!')

    # eval on test dataset after average_checkpoints
    # logger.info("Average models of all fold models.")
    checkpoints = [f'{itm}/model' for itm in model_saved_dirs]
    # average_checkpoints(model, checkpoints)

    # logger.info(f"Save averaged model in {hparams.get_model_filename()}")
    # model.save_weights(hparams.get_model_filename(), save_format="tf")
    if hparams.training.do_eval:
        evaluation(hparams, checkpoints=checkpoints)

    logger.info('Experiment Finish!')
Exemple #2
0
def build_model(hparam: Hparams,
                return_losses=True,
                return_metrics=True,
                return_optimizer=True,
                stage=TRAIN_STAGE):
    """Build custom keras model, losses, metrics, and optimizer

    :param hparam:
    :param return_losses:
    :param return_metrics:
    :param return_optimizer:
    :return:
    """
    logger.info(f"Try to build model {hparam.model_name}")
    from aispace import models
    from aispace.models.base_model import BaseModel
    model = BaseModel.by_name(hparam.model_name)(hparam)
    # build inputs and model
    inputs = build_tf_model_inputs(hparam.dataset)
    model(inputs, training=True)

    rets = ()
    # build losses
    if return_losses:
        losses, loss_weights = build_tf_model_losses(model, hparam.dataset)
        rets += ((losses, loss_weights), )
    # build metrics
    if return_metrics:
        metrics = build_tf_model_metrics(hparam.dataset)
        rets += (metrics, )
    # build optimizer
    if return_optimizer:
        optimizer = build_tf_model_optimizer(hparam.training)
        rets += (optimizer, )
    # if stage == TRAIN_STAGE:
    model.summary()
    # init from pretrained model (language or etc.,)
    if stage == TRAIN_STAGE and not hparam.model_resume_path and not hparam.model_load_path \
            and "pretrained" in hparam and hparam.pretrained.init_from_pretrained:
        try:
            logger.info(f"Load weights from {hparam.pretrained.model_path}")
            if hparam.pretrained.model_path.endswith(".h5"):
                model.load_weights(hparam.pretrained.model_path, by_name=True)
            else:
                logger.info(
                    f"Load weights using model adapter {hparam.pretrained.adapter}"
                )
                adapter = build_model_adapter(hparam.pretrained)
                if adapter is not None:
                    adapter(model.trainable_variables,
                            hparam.pretrained.model_path)
        except Exception as e:
            logging.error("Load weights failure!", exc_info=True)
            raise e

    # initializer model
    if stage == TRAIN_STAGE and not hparam.model_resume_path and hparam.model_load_path is not None:
        model_saved = os.path.join(hparam.model_load_path, "model_saved",
                                   "model")
        logger.info(f"Initialize model from {model_saved}")
        model.load_weights(model_saved)

    # resume model
    if stage == TRAIN_STAGE and hparam.model_resume_path is not None:
        model_saved = os.path.join(hparam.get_workspace_dir(), "model_saved",
                                   "model")
        logger.info(f"Resume model from {model_saved}")
        model.load_weights(model_saved)

    return (model, ) + rets
Exemple #3
0
def experiment(hparams: Hparams):
    logger = logging.getLogger(__name__)
    if hparams.use_mixed_float16:
        logger.info("Use auto mixed policy")
        # tf.keras.mixed_precision.experimental.set_policy('mixed_float16')
        os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

    strategy = tf.distribute.MirroredStrategy(
        devices=[f"/gpu:{id}" for id in hparams.gpus])
    # build dataset
    train_dataset, dev_dataset, dataset_info = next(
        load_dataset(hparams, ret_test=False))

    with strategy.scope():
        # build model
        model, (losses,
                loss_weights), metrics, optimizer = build_model(hparams)
        # build callbacks
        callbacks = build_callbacks(hparams)
        # compile
        model.compile(optimizer=optimizer,
                      loss=losses,
                      metrics=metrics,
                      loss_weights=loss_weights)
        # fit
        if hparams.training.do_eval:
            validation_data = dev_dataset
            validation_steps = hparams.training.validation_steps
        else:
            logger.info("Do not evaluate.")
            validation_data = None
            validation_steps = None

        model.fit(
            train_dataset,
            validation_data=validation_data,
            epochs=hparams.training.max_epochs,
            callbacks=callbacks,
            steps_per_epoch=hparams.training.steps_per_epoch,
            validation_steps=validation_steps,
        )

    # 进行lr finder
    lr_finder_call_back = [
        cb for cb in callbacks if hasattr(cb, "lr_finder_plot")
    ]
    if len(lr_finder_call_back) != 0:
        logger.info(
            f"Do lr finder, and save result in {hparams.get_lr_finder_jpg_file()}"
        )
        lr_finder_call_back[0].lr_finder_plot(hparams.get_lr_finder_jpg_file())
    else:
        # load best model
        checkpoint_dir = os.path.join(hparams.get_workspace_dir(),
                                      "checkpoint")
        if hparams.eval_use_best and os.path.exists(checkpoint_dir):
            logger.info(f"Load best model from {checkpoint_dir}")
            average_checkpoints(model, checkpoint_dir)
        # save best model
        logger.info(f'Save model in {hparams.get_model_filename()}')
        model.save_weights(hparams.get_model_filename(), save_format="tf")

        # eval on test dataset and make reports
        if hparams.training.do_eval:
            evaluation(hparams)

    logger.info('Experiment Finish!')