def _create_optimizer(self):
        """Creates optimizer."""
        params = self.params
        # TODO(b/139414679): Explore the difference between using
        # LearningRateSchedule and callback for GPU runs, and try to merge them.
        lr_schedule = optimizer.LearningRateSchedule(
            params["learning_rate"], params["hidden_size"],
            params["learning_rate_warmup_steps"])
        opt = tf.keras.optimizers.Adam(
            lr_schedule if self.use_tpu else params["learning_rate"],
            params["optimizer_adam_beta1"],
            params["optimizer_adam_beta2"],
            epsilon=params["optimizer_adam_epsilon"])

        if params["dtype"] == tf.float16:
            opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
                opt, loss_scale=flags_core.get_loss_scale(self.flags_obj,
                                                          default_for_fp16="dynamic"))
        if self.flags_obj.fp16_implementation == "graph_rewrite":
            # Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as
            # determined by flags_core.get_tf_dtype(flags_obj) would be 'float32'
            # which will ensure tf.compat.v2.keras.mixed_precision and
            # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double
            # up.
            opt = tf.train.experimental.enable_mixed_precision_graph_rewrite(opt)

        return opt
Exemple #2
0
 def _create_optimizer(self):
     """Creates optimizer."""
     params = self.params
     # TODO(b/139414679): Explore the difference between using
     # LearningRateSchedule and callback for GPU runs, and try to merge them.
     lr_schedule = optimizer.LearningRateSchedule(
         params["learning_rate"], params["hidden_size"],
         params["learning_rate_warmup_steps"])
     opt = tf.keras.optimizers.Adam(
         lr_schedule if self.use_tpu else params["learning_rate"],
         params["optimizer_adam_beta1"],
         params["optimizer_adam_beta2"],
         epsilon=params["optimizer_adam_epsilon"])
     return opt
Exemple #3
0
 def _create_optimizer(self):
   """Creates optimizer."""
   params = self.params
   # TODO(b/139414679): Explore the difference between using
   # LearningRateSchedule and callback for GPU runs, and try to merge them.
   lr_schedule = optimizer.LearningRateSchedule(
       params["learning_rate"], params["hidden_size"],
       params["learning_rate_warmup_steps"])
   opt = tf.keras.optimizers.Adam(
       lr_schedule if self.use_tpu else params["learning_rate"],
       params["optimizer_adam_beta1"],
       params["optimizer_adam_beta2"],
       epsilon=params["optimizer_adam_epsilon"])
   if params["dtype"] == tf.float16:
     opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
         opt, loss_scale=flags_core.get_loss_scale(self.flags_obj,
                                                   default_for_fp16="dynamic"))
   return opt