Ejemplo n.º 1
0
    def test_minimize_fp16(self):

        optimizer = performance.configure_optimizer(
            tf.keras.optimizers.SGD(0.1), use_float16=True)
        performance.set_mixed_precision_policy(tf.float16)
        with tf.GradientTape() as tape:
            model = tf.keras.layers.Dense(2)
            outputs = model(tf.zeros((2, 2), tf.float16))
            loss = tf.reduce_mean(outputs)

        grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
                                                     model.trainable_variables)

        # Test other fp16 settings.
        def _clip_by_global_norm(grads_and_vars):
            grads, tvars = list(zip(*grads_and_vars))
            (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
            return zip(grads, tvars)

        with tf.GradientTape() as tape:
            model = tf.keras.layers.Dense(2)
            outputs = model(tf.zeros((2, 2), tf.float16))
            loss = tf.reduce_mean(outputs)
        optimizer = performance.configure_optimizer(
            tf.keras.optimizers.SGD(0.1), use_float16=True, loss_scale=128)
        grad_utils.minimize_using_explicit_allreduce(
            tape,
            optimizer,
            loss,
            model.trainable_variables,
            pre_allreduce_callbacks=[_clip_by_global_norm],
            post_allreduce_callbacks=[_clip_by_global_norm])
Ejemplo n.º 2
0
    def create_optimizer(self,
                         optimizer_config: OptimizationConfig,
                         runtime_config: Optional[RuntimeConfig] = None):
        """Creates an TF optimizer from configurations.

    Args:
      optimizer_config: the parameters of the Optimization settings.
      runtime_config: the parameters of the runtime.

    Returns:
      A tf.optimizers.Optimizer object.
    """
        if (optimizer_config.optimizer.type == 'lars'
                and self.task_config.loss.l2_weight_decay > 0.0):
            raise ValueError(
                'The l2_weight_decay cannot be used together with lars '
                'optimizer. Please set it to 0.')

        opt_factory = optimization.OptimizerFactory(optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if runtime_config and runtime_config.loss_scale:
            optimizer = performance.configure_optimizer(
                optimizer,
                use_float16=runtime_config.mixed_precision_dtype == 'float16',
                loss_scale=runtime_config.loss_scale)

        return optimizer
Ejemplo n.º 3
0
    def create_optimizer(cls,
                         optimizer_config: OptimizationConfig,
                         runtime_config: Optional[RuntimeConfig] = None):
        """Creates an TF optimizer from configurations.

    Args:
      optimizer_config: the parameters of the Optimization settings.
      runtime_config: the parameters of the runtime.

    Returns:
      A tf.optimizers.Optimizer object.
    """
        opt_factory = optimization.OptimizerFactory(optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate())
        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if runtime_config and runtime_config.loss_scale:
            optimizer = performance.configure_optimizer(
                optimizer,
                use_float16=runtime_config.mixed_precision_dtype == "float16",
                loss_scale=runtime_config.loss_scale,
                use_experimental_api=True)

        return optimizer
Ejemplo n.º 4
0
 def _compile_model(classifier_model, train_args):
     # creates an optimizer with learning rate schedule
     train_data_size = train_args['train_data_size']
     steps_per_epoch = int(train_data_size / train_args['batch_size'])
     num_train_steps = steps_per_epoch * train_args['num_train_epochs']
     warmup_steps = int(train_args['num_train_epochs'] * train_data_size *
                        0.1 / train_args['batch_size'])
     optimizer = nlp.optimization.create_optimizer(
         train_args['init_lr'],
         num_train_steps=num_train_steps,
         num_warmup_steps=warmup_steps)
     classifier_model.optimizer = performance.configure_optimizer(
         optimizer,
         use_float16=train_args['use_float16'],
         use_graph_rewrite=train_args['use_graph_rewrite'])
     # Create metrics
     metrics = [
         tf.keras.metrics.SparseCategoricalAccuracy('accuracy',
                                                    dtype=tf.float32)
     ]
     # Create loss function
     loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
     # Compile the classification model
     classifier_model.compile(optimizer=classifier_model.optimizer,
                              loss=loss,
                              metrics=metrics)
Ejemplo n.º 5
0
 def _get_pretrain_model():
   """Gets a pretraining model."""
   pretrain_model, core_model = bert_models.pretrain_model(
       bert_config, max_seq_length, max_predictions_per_seq)
   optimizer = optimization.create_optimizer(
       initial_lr, steps_per_epoch * epochs, warmup_steps)
   pretrain_model.optimizer = performance.configure_optimizer(
       optimizer,
       use_float16=common_flags.use_float16(),
       use_graph_rewrite=common_flags.use_graph_rewrite())
   return pretrain_model, core_model
Ejemplo n.º 6
0
 def _get_pretrain_model():
   """Gets a pretraining model."""
   pretrain_model, core_model = bert_models.pretrain_model(
       bert_config, max_seq_length, max_predictions_per_seq,
       use_next_sentence_label=use_next_sentence_label)
   optimizer = optimization.create_optimizer(
       initial_lr, steps_per_epoch * epochs, warmup_steps,
       end_lr, optimizer_type)
   pretrain_model.optimizer = performance.configure_optimizer(
       optimizer,
       use_float16=common_flags.use_float16())
   return pretrain_model, core_model
Ejemplo n.º 7
0
 def _get_model():
     """Gets a ner model."""
     model, core_model = (ner_model(bert_config, num_classes,
                                    FLAGS.use_crf))
     optimizer = optimization.create_optimizer(initial_lr,
                                               steps_per_epoch * epochs,
                                               warmup_steps, FLAGS.end_lr,
                                               FLAGS.optimizer_type)
     model.optimizer = performance.configure_optimizer(
         optimizer,
         use_float16=common_flags.use_float16(),
         use_graph_rewrite=common_flags.use_graph_rewrite())
     return model, core_model
Ejemplo n.º 8
0
 def _get_classifier_model():
     """Gets a classifier model."""
     classifier_model, core_model = (bert_models.classifier_model(
         bert_config,
         num_classes,
         max_seq_length,
         hub_module_url=FLAGS.hub_module_url,
         hub_module_trainable=FLAGS.hub_module_trainable))
     optimizer = optimization.create_optimizer(initial_lr,
                                               steps_per_epoch * epochs,
                                               warmup_steps, FLAGS.end_lr,
                                               FLAGS.optimizer_type)
     classifier_model.optimizer = performance.configure_optimizer(
         optimizer, use_float16=common_flags.use_float16())
     return classifier_model, core_model
Ejemplo n.º 9
0
    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            hub_module_url=FLAGS.hub_module_url,
            hub_module_trainable=FLAGS.hub_module_trainable)
        optimizer = optimization.create_optimizer(FLAGS.learning_rate,
                                                  steps_per_epoch * epochs,
                                                  warmup_steps, FLAGS.end_lr,
                                                  FLAGS.optimizer_type)

        squad_model.optimizer = performance.configure_optimizer(
            optimizer, use_float16=common_flags.use_float16())
        return squad_model, core_model
Ejemplo n.º 10
0
    def create_optimizer(self,
                         optimizer_config: OptimizationConfig,
                         runtime_config: Optional[RuntimeConfig] = None):
        """Creates an TF optimizer from configurations.

    Args:
      optimizer_config: the parameters of the Optimization settings.
      runtime_config: the parameters of the runtime.

    Returns:
      A tf.optimizers.Optimizer object.
    """
        opt_factory = optimization.YoloOptimizerFactory(optimizer_config)
        # pylint: disable=protected-access
        ema = opt_factory._use_ema
        opt_factory._use_ema = False

        opt_type = opt_factory._optimizer_type
        if opt_type == 'sgd_torch':
            optimizer = opt_factory.build_optimizer(
                opt_factory.build_learning_rate())
            optimizer.set_bias_lr(
                opt_factory.get_bias_lr_schedule(
                    self._task_config.smart_bias_lr))
            optimizer.search_and_set_variable_groups(
                self._model.trainable_variables)
        else:
            optimizer = opt_factory.build_optimizer(
                opt_factory.build_learning_rate())
        opt_factory._use_ema = ema

        if ema:
            logging.info('EMA is enabled.')
        optimizer = opt_factory.add_ema(optimizer)

        # pylint: enable=protected-access

        if runtime_config and runtime_config.loss_scale:
            use_float16 = runtime_config.mixed_precision_dtype == 'float16'
            optimizer = performance.configure_optimizer(
                optimizer,
                use_graph_rewrite=False,
                use_float16=use_float16,
                loss_scale=runtime_config.loss_scale)

        return optimizer
Ejemplo n.º 11
0
 def _get_model():
     """Gets a siamese model."""
     if FLAGS.model_type == 'siamese':
         model, core_model = (siamese_bert.siamese_model(
             bert_config, num_classes, siamese_type=FLAGS.siamese_type))
     else:
         model, core_model = (bert_models.classifier_model(
             bert_config, num_classes, max_seq_length))
     optimizer = optimization.create_optimizer(initial_lr,
                                               steps_per_epoch * epochs,
                                               warmup_steps, FLAGS.end_lr,
                                               FLAGS.optimizer_type)
     model.optimizer = performance.configure_optimizer(
         optimizer,
         use_float16=common_flags.use_float16(),
         use_graph_rewrite=common_flags.use_graph_rewrite())
     return model, core_model
Ejemplo n.º 12
0
  def _create_optimizer(self):
    """Creates optimizer."""
    params = self.params
    lr_schedule = optimizer.LearningRateSchedule(
        params["learning_rate"], params["hidden_size"],
        params["learning_rate_warmup_steps"])
    opt = tf.keras.optimizers.Adam(
        lr_schedule,
        params["optimizer_adam_beta1"],
        params["optimizer_adam_beta2"],
        epsilon=params["optimizer_adam_epsilon"])

    opt = performance.configure_optimizer(
        opt,
        use_float16=params["dtype"] == tf.float16,
        loss_scale=flags_core.get_loss_scale(
            self.flags_obj, default_for_fp16="dynamic"))

    return opt
Ejemplo n.º 13
0
    def create_optimizer(
            cls,
            optimizer_config: OptimizationConfig,
            runtime_config: Optional[RuntimeConfig] = None,
            dp_config: Optional[DifferentialPrivacyConfig] = None):
        """Creates an TF optimizer from configurations.

    Args:
      optimizer_config: the parameters of the Optimization settings.
      runtime_config: the parameters of the runtime.
      dp_config: the parameter of differential privacy.

    Returns:
      A tf.optimizers.Optimizer object.
    """
        gradient_transformers = None
        if dp_config is not None:
            logging.info(
                "Adding differential privacy transform with config %s.",
                dp_config.as_dict())
            noise_stddev = dp_config.clipping_norm * dp_config.noise_multiplier
            gradient_transformers = [
                functools.partial(ops.clip_l2_norm,
                                  l2_norm_clip=dp_config.clipping_norm),
                functools.partial(ops.add_noise, noise_stddev=noise_stddev)
            ]

        opt_factory = optimization.OptimizerFactory(optimizer_config)
        optimizer = opt_factory.build_optimizer(
            opt_factory.build_learning_rate(),
            gradient_transformers=gradient_transformers)
        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if runtime_config:
            optimizer = performance.configure_optimizer(
                optimizer,
                use_float16=runtime_config.mixed_precision_dtype == "float16",
                loss_scale=runtime_config.loss_scale)

        return optimizer
Ejemplo n.º 14
0
def train_and_eval(
    params: base_configs.ExperimentConfig,
    strategy_override: tf.distribute.Strategy) -> Mapping[str, Any]:
  """Runs the train and eval path using compile/fit."""
  logging.info('Running train and eval.')

  distribute_utils.configure_cluster(params.runtime.worker_hosts,
                                     params.runtime.task_index)

  # Note: for TPUs, strategy and scope should be created before the dataset
  strategy = strategy_override or distribute_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
      all_reduce_alg=params.runtime.all_reduce_alg,
      num_gpus=params.runtime.num_gpus,
      tpu_address=params.runtime.tpu)

  strategy_scope = distribute_utils.get_strategy_scope(strategy)

  logging.info('Detected %d devices.',
               strategy.num_replicas_in_sync if strategy else 1)

  label_smoothing = params.model.loss.label_smoothing
  one_hot = label_smoothing and label_smoothing > 0

  builders = _get_dataset_builders(params, strategy, one_hot)
  datasets = [
      builder.build(strategy) if builder else None for builder in builders
  ]

  # Unpack datasets and builders based on train/val/test splits
  train_builder, validation_builder = builders  # pylint: disable=unbalanced-tuple-unpacking
  train_dataset, validation_dataset = datasets

  train_epochs = params.train.epochs
  train_steps = params.train.steps or train_builder.num_steps
  validation_steps = params.evaluation.steps or validation_builder.num_steps

  initialize(params, train_builder)

  logging.info('Global batch size: %d', train_builder.global_batch_size)

  with strategy_scope:
    model_params = params.model.model_params.as_dict()
    model = get_models()[params.model.name](**model_params)
    learning_rate = optimizer_factory.build_learning_rate(
        params=params.model.learning_rate,
        batch_size=train_builder.global_batch_size,
        train_epochs=train_epochs,
        train_steps=train_steps)
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=params.model.optimizer.name,
        base_learning_rate=learning_rate,
        params=params.model.optimizer.as_dict(),
        model=model)
    optimizer = performance.configure_optimizer(
        optimizer,
        use_float16=train_builder.dtype == 'float16',
        loss_scale=get_loss_scale(params))

    metrics_map = _get_metrics(one_hot)
    metrics = [metrics_map[metric] for metric in params.train.metrics]
    steps_per_loop = train_steps if params.train.set_epoch_loop else 1

    if one_hot:
      loss_obj = tf.keras.losses.CategoricalCrossentropy(
          label_smoothing=params.model.loss.label_smoothing)
    else:
      loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
    model.compile(
        optimizer=optimizer,
        loss=loss_obj,
        metrics=metrics,
        steps_per_execution=steps_per_loop)

    initial_epoch = 0
    if params.train.resume_checkpoint:
      initial_epoch = resume_from_checkpoint(
          model=model, model_dir=params.model_dir, train_steps=train_steps)

    callbacks = custom_callbacks.get_callbacks(
        model_checkpoint=params.train.callbacks.enable_checkpoint_and_export,
        include_tensorboard=params.train.callbacks.enable_tensorboard,
        time_history=params.train.callbacks.enable_time_history,
        track_lr=params.train.tensorboard.track_lr,
        write_model_weights=params.train.tensorboard.write_model_weights,
        initial_step=initial_epoch * train_steps,
        batch_size=train_builder.global_batch_size,
        log_steps=params.train.time_history.log_steps,
        model_dir=params.model_dir,
        backup_and_restore=params.train.callbacks.enable_backup_and_restore)

  serialize_config(params=params, model_dir=params.model_dir)

  if params.evaluation.skip_eval:
    validation_kwargs = {}
  else:
    validation_kwargs = {
        'validation_data': validation_dataset,
        'validation_steps': validation_steps,
        'validation_freq': params.evaluation.epochs_between_evals,
    }

  history = model.fit(
      train_dataset,
      epochs=train_epochs,
      steps_per_epoch=train_steps,
      initial_epoch=initial_epoch,
      callbacks=callbacks,
      verbose=2,
      **validation_kwargs)

  validation_output = None
  if not params.evaluation.skip_eval:
    validation_output = model.evaluate(
        validation_dataset, steps=validation_steps, verbose=2)

  # TODO(dankondratyuk): eval and save final test accuracy
  stats = common.build_stats(history, validation_output, callbacks)
  return stats
Ejemplo n.º 15
0
    def __init__(self, flags_obj, time_callback, epoch_steps):
        self.strategy = tf.distribute.get_strategy()
        self.flags_obj = flags_obj
        self.dtype = flags_core.get_tf_dtype(flags_obj)
        self.time_callback = time_callback

        # Input pipeline related
        batch_size = flags_obj.batch_size
        if batch_size % self.strategy.num_replicas_in_sync != 0:
            raise ValueError(
                'Batch size must be divisible by number of replicas : {}'.
                format(self.strategy.num_replicas_in_sync))

        # As auto rebatching is not supported in
        # `distribute_datasets_from_function()` API, which is
        # required when cloning dataset to multiple workers in eager mode,
        # we use per-replica batch size.
        self.batch_size = int(batch_size / self.strategy.num_replicas_in_sync)

        if self.flags_obj.use_synthetic_data:
            self.input_fn = common.get_synth_input_fn(
                height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
                width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
                num_channels=imagenet_preprocessing.NUM_CHANNELS,
                num_classes=imagenet_preprocessing.NUM_CLASSES,
                dtype=self.dtype,
                drop_remainder=True)
        else:
            self.input_fn = imagenet_preprocessing.input_fn

        self.model = resnet_model.resnet50(
            num_classes=imagenet_preprocessing.NUM_CLASSES,
            use_l2_regularizer=not flags_obj.single_l2_loss_op)

        lr_schedule = common.PiecewiseConstantDecayWithWarmup(
            batch_size=flags_obj.batch_size,
            epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
            warmup_epochs=common.LR_SCHEDULE[0][1],
            boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
            multipliers=list(p[0] for p in common.LR_SCHEDULE),
            compute_lr_on_cpu=True)
        self.optimizer = common.get_optimizer(lr_schedule)
        # Make sure iterations variable is created inside scope.
        self.global_step = self.optimizer.iterations

        use_graph_rewrite = flags_obj.fp16_implementation == 'graph_rewrite'
        if use_graph_rewrite and not flags_obj.use_tf_function:
            raise ValueError('--fp16_implementation=graph_rewrite requires '
                             '--use_tf_function to be true')
        self.optimizer = performance.configure_optimizer(
            self.optimizer,
            use_float16=self.dtype == tf.float16,
            use_graph_rewrite=use_graph_rewrite,
            loss_scale=flags_core.get_loss_scale(flags_obj,
                                                 default_for_fp16=128))

        self.train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            'train_accuracy', dtype=tf.float32)
        self.test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            'test_accuracy', dtype=tf.float32)

        self.checkpoint = tf.train.Checkpoint(model=self.model,
                                              optimizer=self.optimizer)

        # Handling epochs.
        self.epoch_steps = epoch_steps
        self.epoch_helper = orbit.utils.EpochHelper(epoch_steps,
                                                    self.global_step)
        train_dataset = orbit.utils.make_distributed_dataset(
            self.strategy,
            self.input_fn,
            is_training=True,
            data_dir=self.flags_obj.data_dir,
            batch_size=self.batch_size,
            parse_record_fn=imagenet_preprocessing.parse_record,
            datasets_num_private_threads=self.flags_obj.
            datasets_num_private_threads,
            dtype=self.dtype,
            drop_remainder=True)
        orbit.StandardTrainer.__init__(
            self,
            train_dataset,
            options=orbit.StandardTrainerOptions(
                use_tf_while_loop=flags_obj.use_tf_while_loop,
                use_tf_function=flags_obj.use_tf_function))
        if not flags_obj.skip_eval:
            eval_dataset = orbit.utils.make_distributed_dataset(
                self.strategy,
                self.input_fn,
                is_training=False,
                data_dir=self.flags_obj.data_dir,
                batch_size=self.batch_size,
                parse_record_fn=imagenet_preprocessing.parse_record,
                dtype=self.dtype)
            orbit.StandardEvaluator.__init__(
                self,
                eval_dataset,
                options=orbit.StandardEvaluatorOptions(
                    use_tf_function=flags_obj.use_tf_function))
Ejemplo n.º 16
0
    def __init__(self,
                 config: ExperimentConfig,
                 task: base_task.Task,
                 train: bool = True,
                 evaluate: bool = True,
                 model=None,
                 optimizer=None,
                 checkpoint_exporter=None):
        """Initialize common trainer for TensorFlow models.

    Args:
      config: An `ExperimentConfig` instance specifying experiment config.
      task: A base_task.Task instance.
      train: bool, whether or not this trainer will be used for training.
        default to True.
      evaluate: bool, whether or not this trainer will be used for evaluation.
        default to True.
      model: tf.keras.Model instance. If provided, it will be used instead of
        building model using task.build_model(). Default to None.
      optimizer: tf.keras.optimizers.Optimizer instance. If provided, it will
        used instead of the optimizer from config. Default to None.
      checkpoint_exporter: an object that has the `maybe_export_checkpoint`
        interface.
    """
        # Gets the current distribution strategy. If not inside any strategy scope,
        # it gets a single-replica no-op strategy.
        self._strategy = tf.distribute.get_strategy()
        self._config = config
        self._task = task

        self._model = model or task.build_model()

        if optimizer is None:
            opt_factory = optimization.OptimizerFactory(
                config.trainer.optimizer_config)
            self._optimizer = opt_factory.build_optimizer(
                opt_factory.build_learning_rate())
        else:
            self._optimizer = optimizer

        self._checkpoint_exporter = checkpoint_exporter

        # Configuring optimizer when loss_scale is set in runtime config. This helps
        # avoiding overflow/underflow for float16 computations.
        if config.runtime.loss_scale:
            self._optimizer = performance.configure_optimizer(
                self._optimizer,
                use_float16=config.runtime.mixed_precision_dtype == 'float16',
                loss_scale=config.runtime.loss_scale)

        # global_step increases by 1 after each training iteration.
        # We should have global_step.numpy() == self.optimizer.iterations.numpy()
        # when there is only 1 optimizer.
        self._global_step = orbit.utils.create_global_step()
        if hasattr(self.model, 'checkpoint_items'):
            checkpoint_items = self.model.checkpoint_items
        else:
            checkpoint_items = {}
        self._checkpoint = tf.train.Checkpoint(global_step=self.global_step,
                                               model=self.model,
                                               optimizer=self.optimizer,
                                               **checkpoint_items)

        self._train_loss = tf.keras.metrics.Mean('training_loss',
                                                 dtype=tf.float32)
        self._validation_loss = tf.keras.metrics.Mean('validation_loss',
                                                      dtype=tf.float32)
        self._train_metrics = self.task.build_metrics(
            training=True) + self.model.metrics
        self._validation_metrics = self.task.build_metrics(
            training=False) + self.model.metrics

        if train:
            train_dataset = orbit.utils.make_distributed_dataset(
                self.strategy, self.task.build_inputs,
                self.config.task.train_data)
            orbit.StandardTrainer.__init__(
                self,
                train_dataset,
                options=orbit.StandardTrainerOptions(
                    use_tf_while_loop=config.trainer.train_tf_while_loop,
                    use_tf_function=config.trainer.train_tf_function,
                    use_tpu_summary_optimization=config.trainer.
                    allow_tpu_summary))

        if evaluate:
            eval_dataset = orbit.utils.make_distributed_dataset(
                self.strategy, self.task.build_inputs,
                self.config.task.validation_data)
            orbit.StandardEvaluator.__init__(
                self,
                eval_dataset,
                options=orbit.StandardEvaluatorOptions(
                    use_tf_function=config.trainer.eval_tf_function))