Exemple #1
0
    def _create_callbacks(self, log_dir, init_steps, steps_per_epoch, params,
                          ckpt_mgr):
        """Creates a list of callbacks."""
        sfunc = optimizer.LearningRateFn(params["learning_rate"],
                                         params["hidden_size"],
                                         params["learning_rate_warmup_steps"])

        def _save_checkpoint(epoch, logs):
            if logs['steps'] % steps_per_epoch == 0:
                try:
                    ckpt_mgr.save(checkpoint_number=epoch)
                except:
                    logging.warning(
                        f'save model failed due to an exception. continue without saving\n'
                    )
            else:
                logging.warning(
                    f'not save model when training is interrupted. logs = {logs}\n'
                )

        callbacks = []
        callbacks.append(optimizer.LearningRateScheduler(sfunc, init_steps))
        callbacks.append(
            tf.keras.callbacks.LambdaCallback(on_epoch_end=_save_checkpoint))
        if self.flags_obj.enable_tensorboard:
            tensorboard_callback = utils.TensorBoardFix(
                start_step=init_steps,
                log_dir=log_dir,
                profile_batch=0,
                write_graph=False,
                update_freq=self.flags_obj.batches_between_tensorboard_log)
            callbacks.append(tensorboard_callback)
        callbacks.append(
            utils.CSVLoggerFix(f'{log_dir}/history.step-{init_steps}.log'))
        return callbacks
 def _create_callbacks(self, cur_log_dir, init_steps, params):
     """Creates a list of callbacks."""
     sfunc = optimizer.LearningRateFn(params["learning_rate"],
                                      params["hidden_size"],
                                      params["learning_rate_warmup_steps"])
     scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps)
     callbacks = misc.get_callbacks(params["steps_between_evals"])
     callbacks.append(scheduler_callback)
     ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt")
     callbacks.append(tf.keras.callbacks.ModelCheckpoint(ckpt_full_path,
                                                         save_weights_only=True))
     return callbacks
Exemple #3
0
    def _create_callbacks(self, cur_log_dir, init_steps, params):
        """Creates a list of callbacks."""
        sfunc = optimizer.LearningRateFn(params["learning_rate"],
                                         params["hidden_size"],
                                         params["learning_rate_warmup_steps"])
        scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps)

        tb_logdir = os.path.join(cur_log_dir, "logs")
        save_path = os.path.join(
            cur_log_dir, "weights-epoch-{epoch:02d}-loss-{loss:.4f}.hdf5")
        csv_path = os.path.join(cur_log_dir, "result.csv")
        return [
            scheduler_callback,
            tf.keras.callbacks.TensorBoard(tb_logdir),
            tf.keras.callbacks.ModelCheckpoint(save_path,
                                               save_weights_only=True),
            tf.keras.callbacks.CSVLogger(csv_path, append=True),
        ]