def test_validate_callbacks_predefined_callbacks(self, mock_warning):
    supported_predefined_callbacks = [
        callbacks.TensorBoard(),
        callbacks.CSVLogger(filename='./log.csv'),
        callbacks.EarlyStopping(),
        callbacks.ModelCheckpoint(filepath='./checkpoint'),
        callbacks.TerminateOnNaN(),
        callbacks.ProgbarLogger(),
        callbacks.History(),
        callbacks.RemoteMonitor()
    ]

    distributed_training_utils.validate_callbacks(
        supported_predefined_callbacks, adam.Adam())

    unsupported_predefined_callbacks = [
        callbacks.ReduceLROnPlateau(),
        callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
    ]

    for callback in unsupported_predefined_callbacks:
      with self.assertRaisesRegexp(
          ValueError, 'You must specify a Keras Optimizer V2'):
        distributed_training_utils.validate_callbacks([callback],
                                                      v1_adam.AdamOptimizer())

    self.assertEqual(0, mock_warning.call_count)
예제 #2
0
  def test_validate_callbacks_predefined_callbacks(self):
    supported_predefined_callbacks = [
        callbacks.TensorBoard(),
        callbacks.CSVLogger(filename='./log.csv'),
        callbacks.EarlyStopping(),
        callbacks.ModelCheckpoint(filepath='./checkpoint'),
        callbacks.TerminateOnNaN(),
        callbacks.ProgbarLogger(),
        callbacks.History(),
        callbacks.RemoteMonitor()
    ]

    distributed_training_utils.validate_callbacks(
        supported_predefined_callbacks, adam.Adam())

    unsupported_predefined_callbacks = [
        callbacks.ReduceLROnPlateau(),
        callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
    ]

    for callback in unsupported_predefined_callbacks:
      with self.assertRaisesRegex(ValueError,
                                  'You must specify a Keras Optimizer V2'):
        distributed_training_utils.validate_callbacks([callback],
                                                      v1_adam.AdamOptimizer())
  def test_validate_callbacks_custom_callback(self, mock_warning):

    class CustomCallback(callbacks.Callback):
      pass

    distributed_training_utils.validate_callbacks([CustomCallback()],
                                                  adam.Adam())

    self.assertEqual(1, mock_warning.call_count)

    call_args, call_kwargs = mock_warning.call_args

    self.assertEqual(('Your input callback is not one of the predefined '
                      'Callbacks that supports DistributionStrategy. You '
                      'might encounter an error if you access one of the '
                      'model\'s attributes as part of the callback since '
                      'these attributes are not set. You can access each of '
                      'the individual distributed models using the '
                      '`_grouped_model` attribute of your original model.',),
                     call_args)

    self.assertEqual(0, len(call_kwargs))
예제 #4
0
  def _model_iteration(
      self, model, mode, x=None, y=None, batch_size=None, verbose=1,
      sample_weight=None, steps=None, callbacks=None, max_queue_size=10,
      workers=1, use_multiprocessing=False, **kwargs):

    batch_size = model._validate_or_infer_batch_size(
        batch_size, steps, x)
    strategy = _get_distribution_strategy(model)
    batch_size, steps = dist_utils.process_batch_and_step_size(
        strategy, x, batch_size, steps, mode)
    dist_utils.validate_callbacks(input_callbacks=callbacks,
                                  optimizer=model.optimizer)
    # Enter tf.distribute.Strategy scope.
    with strategy.scope():
      adapter = _process_inputs(
          model,
          mode,
          x,
          y,
          batch_size=batch_size,
          sample_weights=sample_weight,
          steps=steps,
          distribution_strategy=strategy,
          max_queue_size=max_queue_size,
          workers=workers,
          use_multiprocessing=use_multiprocessing)
      total_samples = _get_total_number_of_samples(adapter)
      use_sample = total_samples is not None
      dataset = adapter.get_dataset()

      if not steps:
        # Raise an error if `steps` isn't specified but the dataset
        # is infinite.
        steps = adapter.get_size() or training_utils.infer_steps_for_dataset(
            model, dataset, steps, steps_name='steps')

      # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
      training_context = TrainingContext()
      dataset = strategy.experimental_distribute_dataset(dataset)

      execution_function = training_v2_utils._get_or_make_execution_function(
          model, mode)

      data_iterator = iter(dataset)

      callbacks = cbks.configure_callbacks(
          callbacks,
          model,
          do_validation=False,
          batch_size=batch_size,
          epochs=1,
          steps_per_epoch=steps,
          samples=use_sample,
          count_mode='samples' if use_sample else 'steps',
          verbose=0,  # Handle ProgBarLogger separately in this loop.
          mode=mode)

      with training_context.on_start(
          model, callbacks, use_sample, verbose, mode):
        with training_context.on_epoch(0, mode) as epoch_logs:
          model.reset_metrics()
          result = run_one_epoch(
              model,
              data_iterator,
              execution_function,
              dataset_size=adapter.get_size(),
              batch_size=adapter.batch_size(),
              strategy=strategy,
              steps_per_epoch=steps,
              num_samples=total_samples,
              mode=mode,
              training_context=training_context,
              total_epochs=1)
          cbks.make_logs(model, epoch_logs, result, mode)

    if len(result) == 1:
      result = result[0]
    return result
예제 #5
0
  def fit(
      self, model, x=None, y=None, batch_size=None, epochs=1, verbose=1,
      callbacks=None, validation_split=0., validation_data=None, shuffle=True,
      class_weight=None, sample_weight=None, initial_epoch=0,
      steps_per_epoch=None, validation_steps=None, validation_freq=1,
      max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs):
    batch_size = model._validate_or_infer_batch_size(
        batch_size, steps_per_epoch, x)

    strategy = _get_distribution_strategy(model)
    batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
        strategy,
        x,
        batch_size,
        steps_per_epoch,
        ModeKeys.TRAIN,
        validation_split=validation_split)
    dist_utils.validate_callbacks(input_callbacks=callbacks,
                                  optimizer=model.optimizer)
    # Enter tf.distribute.Strategy scope.
    with strategy.scope():
      training_data_adapter, validation_adapter = _process_training_inputs(
          model,
          x,
          y,
          batch_size=batch_size,
          epochs=epochs,
          sample_weights=sample_weight,
          class_weights=class_weight,
          validation_split=validation_split,
          steps_per_epoch=steps_per_epoch,
          shuffle=shuffle,
          validation_data=validation_data,
          validation_steps=validation_steps,
          distribution_strategy=strategy,
          max_queue_size=max_queue_size,
          workers=workers,
          use_multiprocessing=use_multiprocessing)

      total_samples = _get_total_number_of_samples(training_data_adapter)
      use_sample = total_samples is not None
      do_validation = (validation_adapter is not None)

      recreate_training_iterator = (
          training_data_adapter.should_recreate_iterator(steps_per_epoch))
      if not steps_per_epoch:
        # TODO(b/139762795): Add step inference for when steps is None to
        # prevent end of sequence warning message.
        steps_per_epoch = training_data_adapter.get_size()

      # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
      training_context = TrainingContext()

      training_dataset = training_data_adapter.get_dataset()
      # Raise an error if steps_per_epoch isn't specified but the dataset
      # is infinite.
      # TODO(scottzhu): This check should probably happen in the adapter
      inferred_steps = training_utils.infer_steps_for_dataset(
          model,
          training_dataset,
          steps_per_epoch,
          steps_name='steps_per_epoch',
          epochs=0)

      steps_per_epoch = (
          inferred_steps if steps_per_epoch is None else steps_per_epoch)

      training_dataset = strategy.experimental_distribute_dataset(
          training_dataset)

      training_function = training_v2_utils._get_or_make_execution_function(
          model, ModeKeys.TRAIN)

      training_data_iter = None
      if do_validation:
        validation_dataset = validation_adapter.get_dataset()
        if not validation_steps:
          # Raise an error if validation_steps isn't specified but the
          # validation dataset is infinite.
          validation_steps = (
              validation_adapter.get_size() or
              training_utils.infer_steps_for_dataset(
                  model,
                  validation_dataset,
                  validation_steps,
                  steps_name='validation_steps'))
        eval_function = training_v2_utils._get_or_make_execution_function(
            model, ModeKeys.TEST)
        eval_data_iter = None
        validation_dataset = strategy.experimental_distribute_dataset(
            validation_dataset)
        val_total_samples = _get_total_number_of_samples(validation_adapter)
      else:
        val_total_samples = None

      if verbose and (total_samples or steps_per_epoch):
        _print_train_info(total_samples, steps_per_epoch, val_total_samples,
                          validation_steps)

      training_callbacks = cbks.configure_callbacks(
          callbacks,
          model,
          do_validation=do_validation,
          batch_size=batch_size,
          epochs=epochs,
          steps_per_epoch=steps_per_epoch,
          samples=total_samples or steps_per_epoch,
          count_mode='samples' if use_sample else 'steps',
          verbose=0,  # Handle ProgBarLogger separately in this loop.
          mode=ModeKeys.TRAIN)

      with training_context.on_start(model, training_callbacks, use_sample,
                                     verbose, ModeKeys.TRAIN):

        initial_epoch = model._maybe_load_initial_epoch_from_ckpt(
            initial_epoch, ModeKeys.TRAIN)

        for epoch in range(initial_epoch, epochs):
          if training_context.callbacks.model.stop_training:
            break

          # Training
          with training_context.on_epoch(epoch, ModeKeys.TRAIN) as epoch_logs:
            model.reset_metrics()
            if training_data_iter is None or recreate_training_iterator:
              if (training_data_iter is not None and
                  distribution_strategy_context.has_strategy()):
                # TODO(kaftan): remove this when MultiDeviceIterator is a
                ## compositetensor (unless this is more efficient)
                training_data_iter._initializer  # pylint: disable=pointless-statement
              else:
                training_data_iter = iter(training_dataset)

            training_result = run_one_epoch(
                model,
                training_data_iter,
                training_function,
                dataset_size=training_data_adapter.get_size(),
                batch_size=training_data_adapter.batch_size(),
                strategy=strategy,
                steps_per_epoch=steps_per_epoch,
                num_samples=total_samples,
                mode=ModeKeys.TRAIN,
                training_context=training_context,
                total_epochs=epochs)
            cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)

            # In the case of steps_per_epoch = None, the final cardinality will
            # be determined when the inputs are fully consumed (eg dataset or
            # generator). Update the steps_per_epoch to the new value.
            if (steps_per_epoch is None
                and training_context.progbar.progbar.target is not None):
              steps_per_epoch = training_context.progbar.progbar.target

            # Evaluation
            if (do_validation and
                training_utils.should_run_validation(validation_freq, epoch) and
                not training_callbacks.model.stop_training):
              if (eval_data_iter is not None and
                  distribution_strategy_context.has_strategy()):
                # TODO(kaftan): remove this when MultiDeviceIterator is a
                ## compositetensor (unless this is more efficient)
                eval_data_iter._initializer  # pylint: disable=pointless-statement
              else:
                eval_data_iter = iter(validation_dataset)

              validation_callbacks = cbks.configure_callbacks(
                  training_callbacks,
                  model,
                  batch_size=batch_size,
                  epochs=1,
                  steps_per_epoch=validation_steps,
                  samples=val_total_samples or validation_steps,
                  count_mode='samples' if use_sample else 'steps',
                  verbose=0,  # Handle ProgBarLogger separately in this loop.
                  mode=ModeKeys.TEST)

              eval_context = TrainingContext()
              with eval_context.on_start(
                  model,
                  validation_callbacks,
                  use_sample,
                  verbose=0,
                  mode=ModeKeys.TEST):
                with eval_context.on_epoch(epoch, ModeKeys.TEST):
                  model.reset_metrics()
                  eval_result = run_one_epoch(
                      model,
                      eval_data_iter,
                      eval_function,
                      dataset_size=validation_adapter.get_size(),
                      batch_size=validation_adapter.batch_size(),
                      strategy=strategy,
                      steps_per_epoch=validation_steps,
                      num_samples=val_total_samples,
                      mode=ModeKeys.TEST,
                      training_context=eval_context,
                      total_epochs=1)
                  cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,
                                 prefix='val_')

    return model.history
예제 #6
0
    def fit(self,
            model,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_freq=1,
            **kwargs):
        batch_size = model._validate_or_infer_batch_size(
            batch_size, steps_per_epoch, x)

        strategy = _get_distribution_strategy(model)
        batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
            strategy, x, batch_size, steps_per_epoch, ModeKeys.TRAIN)
        dist_utils.validate_callbacks(input_callbacks=callbacks,
                                      optimizer=model.optimizer)
        # Enter tf.distribute.Strategy scope.
        with strategy.scope():
            training_data_adapter, validation_adapter = _process_training_inputs(
                model,
                x,
                y,
                batch_size=batch_size,
                sample_weights=sample_weight,
                class_weights=class_weight,
                validation_split=validation_split,
                steps_per_epoch=steps_per_epoch,
                shuffle=shuffle,
                validation_data=validation_data,
                validation_steps=validation_steps,
                distribution_strategy=strategy)

            total_samples = _get_total_number_of_samples(training_data_adapter)
            use_sample = total_samples is not None
            do_validation = (validation_adapter is not None)

            if not steps_per_epoch:
                steps_per_epoch = training_data_adapter.get_size()

            # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
            training_context = TrainingContext()

            initial_epoch = model._maybe_load_initial_epoch_from_ckpt(
                initial_epoch, ModeKeys.TRAIN)

            training_dataset = training_data_adapter.get_dataset()
            # Raise an error if steps_per_epoch isn't specified but the dataset
            # is infinite.
            # TODO(scottzhu): This check should probably happen in the adapter
            training_utils.infer_steps_for_dataset(
                training_dataset,
                steps_per_epoch,
                steps_name='steps_per_epoch',
                epochs=0)

            training_dataset = strategy.experimental_distribute_dataset(
                training_dataset)

            _update_sample_weight_mode(model, ModeKeys.TRAIN, training_dataset)
            training_function = training_v2_utils._get_or_make_execution_function(
                model, ModeKeys.TRAIN)

            training_data_iter = None
            # Only recreate iterator when the data has a fixed length, which will be
            # fully consumed every epoch, or has a unknown length (dataset, generator)
            # and will be fully consumed (steps_per_epoch is None)
            recreate_training_iterator = (training_data_adapter.get_size()
                                          is not None
                                          or steps_per_epoch is None)

            if do_validation:
                if not validation_steps:
                    validation_steps = validation_adapter.get_size()
                eval_function = training_v2_utils._get_or_make_execution_function(
                    model, ModeKeys.TEST)
                eval_data_iter = None

                validation_dataset = validation_adapter.get_dataset()
                # Raise an error if validation_steps isn't specified but the validation
                # dataset is infinite.
                # TODO(scottzhu): This check should probably happen in the adapter
                training_utils.infer_steps_for_dataset(
                    validation_dataset,
                    validation_steps,
                    steps_name='validation_steps',
                    epochs=0)
                validation_dataset = strategy.experimental_distribute_dataset(
                    validation_dataset)

            callbacks = cbks.configure_callbacks(
                callbacks,
                model,
                do_validation=do_validation,
                batch_size=batch_size,
                epochs=epochs,
                steps_per_epoch=steps_per_epoch,
                samples=total_samples,
                count_mode='samples' if use_sample else 'steps',
                verbose=0,  # Handle ProgBarLogger separately in this loop.
                mode=ModeKeys.TRAIN)

            with training_context.on_start(model, callbacks, use_sample,
                                           verbose, ModeKeys.TRAIN):
                # TODO(scottzhu): Handle TPUStrategy training loop
                for epoch in range(initial_epoch, epochs):
                    if training_context.callbacks.model.stop_training:
                        break

                    # Training
                    with training_context.on_epoch(
                            epoch, ModeKeys.TRAIN) as epoch_logs:
                        model.reset_metrics()
                        if training_data_iter is None or recreate_training_iterator:
                            if (training_data_iter is not None
                                    and distribution_strategy_context.
                                    has_strategy()):
                                # TODO(kaftan): remove this when MultiDeviceIterator is a
                                ## compositetensor (unless this is more efficient)
                                training_data_iter._initializer  # pylint: disable=pointless-statement
                            else:
                                training_data_iter = iter(training_dataset)

                        training_result = run_one_epoch(
                            model,
                            training_data_iter,
                            training_function,
                            dataset_size=training_data_adapter.get_size(),
                            batch_size=training_data_adapter.batch_size(),
                            strategy=strategy,
                            steps_per_epoch=steps_per_epoch,
                            num_samples=total_samples,
                            mode=ModeKeys.TRAIN,
                            training_context=training_context,
                            total_epochs=epochs)
                        cbks.make_logs(model, epoch_logs, training_result,
                                       ModeKeys.TRAIN)

                        # Evaluation
                        if (do_validation
                                and training_utils.should_run_validation(
                                    validation_freq, epoch)
                                and not callbacks.model.stop_training):
                            if (eval_data_iter is not None
                                    and distribution_strategy_context.
                                    has_strategy()):
                                # TODO(kaftan): remove this when MultiDeviceIterator is a
                                ## compositetensor (unless this is more efficient)
                                eval_data_iter._initializer  # pylint: disable=pointless-statement
                            else:
                                eval_data_iter = iter(validation_dataset)

                            val_total_samples = _get_total_number_of_samples(
                                validation_adapter)
                            eval_context = TrainingContext()
                            with eval_context.on_start(model,
                                                       callbacks,
                                                       use_sample,
                                                       verbose=0,
                                                       mode=ModeKeys.TEST):
                                with eval_context.on_epoch(
                                        epoch, ModeKeys.TEST):
                                    model.reset_metrics()
                                    eval_result = run_one_epoch(
                                        model,
                                        eval_data_iter,
                                        eval_function,
                                        dataset_size=validation_adapter.
                                        get_size(),
                                        batch_size=validation_adapter.
                                        batch_size(),
                                        strategy=strategy,
                                        steps_per_epoch=validation_steps,
                                        num_samples=val_total_samples,
                                        mode=ModeKeys.TEST,
                                        training_context=eval_context,
                                        total_epochs=1)
                                    cbks.make_logs(model,
                                                   epoch_logs,
                                                   eval_result,
                                                   ModeKeys.TEST,
                                                   prefix='val_')

        return model.history
예제 #7
0
def fit_distributed(model,
                    x=None,
                    y=None,
                    batch_size=None,
                    epochs=1,
                    verbose=1,
                    callbacks=None,
                    validation_split=0.,
                    validation_data=None,
                    shuffle=True,
                    class_weight=None,
                    sample_weight=None,
                    initial_epoch=0,
                    steps_per_epoch=None,
                    validation_steps=None,
                    validation_freq=1):
  """Fit loop for Distribution Strategies."""
  distributed_training_utils.validate_callbacks(callbacks, model.optimizer)
  distributed_training_utils.validate_inputs(
      x, y)

  first_x_value = nest.flatten(x)[0]
  if isinstance(first_x_value, np.ndarray):
    # Until support for partial batch is implemented across all
    # functions and distribution strategy, we pass `mode` to selectively
    # relax the costraint to consume all the training samples.
    steps_per_epoch, batch_size = (
        distributed_training_utils.get_input_params(
            model._distribution_strategy, first_x_value, steps_per_epoch,
            batch_size, mode=ModeKeys.TRAIN))
  batch_size = model._validate_or_infer_batch_size(
      batch_size, steps_per_epoch, x)
  dataset = model._distribution_standardize_user_data(
      x, y,
      sample_weight=sample_weight,
      class_weight=class_weight,
      batch_size=batch_size,
      validation_split=validation_split,
      shuffle=shuffle,
      epochs=epochs)
  if not distributed_training_utils.is_distributing_by_cloning(model):
    with model._distribution_strategy.scope():
      (dataset, _, _) = model._standardize_user_data(
          dataset,
          sample_weight=sample_weight,
          class_weight=class_weight,
          batch_size=batch_size,
          validation_split=validation_split,
          shuffle=shuffle)

  val_dataset = None
  if validation_data:
    val_x, val_y, val_sample_weights = model._unpack_validation_data(
        validation_data)
    distributed_training_utils.validate_inputs(val_x, val_y)
    first_valx_value = nest.flatten(val_x)[0]
    if isinstance(first_valx_value, np.ndarray):
      validation_steps, _ = distributed_training_utils.get_input_params(
          model._distribution_strategy, first_valx_value, validation_steps,
          batch_size, mode=ModeKeys.TEST)
    val_dataset = model._distribution_standardize_user_data(
        val_x, val_y,
        sample_weight=val_sample_weights,
        class_weight=None,
        batch_size=batch_size,
        validation_split=validation_split,
        shuffle=shuffle,
        allow_partial_batch=True)
  elif validation_split:
    raise ValueError('validation_split argument is not supported with '
                     'distribution strategies.')

  if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
    return experimental_tpu_fit_loop(
        model,
        dataset,
        epochs=epochs,
        verbose=verbose,
        callbacks=callbacks,
        val_dataset=val_dataset,
        initial_epoch=initial_epoch,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        validation_freq=validation_freq)
  else:
    return training_arrays.fit_loop(
        model,
        dataset,
        batch_size=batch_size,
        epochs=epochs,
        verbose=verbose,
        callbacks=callbacks,
        val_inputs=val_dataset,
        shuffle=shuffle,
        initial_epoch=initial_epoch,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        validation_freq=validation_freq,
        steps_name='steps_per_epoch')
예제 #8
0
    def fit(self,
            model,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_freq=1,
            **kwargs):
        """Fit loop for Distribution Strategies."""
        dist_utils.validate_callbacks(input_callbacks=callbacks,
                                      optimizer=model.optimizer)
        dist_utils.validate_inputs(x, y)

        batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
            model._distribution_strategy, x, batch_size, steps_per_epoch,
            ModeKeys.TRAIN)
        batch_size = model._validate_or_infer_batch_size(
            batch_size, steps_per_epoch, x)
        dataset = model._distribution_standardize_user_data(
            x,
            y,
            sample_weight=sample_weight,
            class_weight=class_weight,
            batch_size=batch_size,
            validation_split=validation_split,
            shuffle=shuffle,
            epochs=epochs)
        if not dist_utils.is_distributing_by_cloning(model):
            with model._distribution_strategy.scope():
                (dataset, _, _) = model._standardize_user_data(
                    dataset,
                    sample_weight=sample_weight,
                    class_weight=class_weight,
                    batch_size=batch_size,
                    validation_split=validation_split,
                    shuffle=shuffle)

        val_dataset = None
        if validation_data:
            val_x, val_y, val_sample_weights = training_utils.unpack_validation_data(
                validation_data)
            dist_utils.validate_inputs(val_x, val_y)
            _, validation_steps = dist_utils.process_batch_and_step_size(
                model._distribution_strategy, val_x, batch_size,
                validation_steps, ModeKeys.TEST)

            val_dataset = model._distribution_standardize_user_data(
                val_x,
                val_y,
                sample_weight=val_sample_weights,
                class_weight=None,
                batch_size=batch_size,
                validation_split=validation_split,
                shuffle=shuffle,
                allow_partial_batch=True)
        elif validation_split:
            raise ValueError('validation_split argument is not supported with '
                             'distribution strategies.')

        if dist_utils.is_tpu_strategy(model._distribution_strategy):
            steps_per_epoch = training_utils.infer_steps_for_dataset(
                dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch')
            if steps_per_epoch is None:
                raise ValueError(
                    'Number of steps could not be inferred from the data, '
                    'please pass the steps_per_epoch argument.')

            if not context.executing_eagerly():
                # Run TPU training in a custom loop in graph mode.
                return experimental_tpu_fit_loop(
                    model,
                    dataset,
                    epochs=epochs,
                    verbose=verbose,
                    callbacks=callbacks,
                    val_dataset=val_dataset,
                    initial_epoch=initial_epoch,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    validation_freq=validation_freq)

        return training_arrays.fit_loop(model,
                                        dataset,
                                        batch_size=batch_size,
                                        epochs=epochs,
                                        verbose=verbose,
                                        callbacks=callbacks,
                                        val_inputs=val_dataset,
                                        shuffle=shuffle,
                                        initial_epoch=initial_epoch,
                                        steps_per_epoch=steps_per_epoch,
                                        validation_steps=validation_steps,
                                        validation_freq=validation_freq,
                                        steps_name='steps_per_epoch')
예제 #9
0
    def _model_iteration(self,
                         model,
                         mode,
                         x=None,
                         y=None,
                         batch_size=None,
                         verbose=1,
                         sample_weight=None,
                         steps=None,
                         callbacks=None,
                         **kwargs):

        batch_size = model._validate_or_infer_batch_size(batch_size, steps, x)
        strategy = _get_distribution_strategy(model)
        if strategy:
            batch_size, steps = dist_utils.process_batch_and_step_size(
                strategy, x, batch_size, steps, mode)
            dist_utils.validate_callbacks(input_callbacks=callbacks,
                                          optimizer=model.optimizer)
            # Enter tf.distribute.Strategy scope.
            scope = dist_utils.distributed_scope(strategy=strategy,
                                                 learning_phase=0)
            scope.__enter__()

        adapter = _process_inputs(model,
                                  x,
                                  y,
                                  batch_size=batch_size,
                                  sample_weights=sample_weight,
                                  steps=steps,
                                  distribution_strategy=strategy)

        if not steps:
            steps = adapter.get_size()

        # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
        training_context = TrainingContext()

        _update_sample_weight_mode(model, mode, adapter)
        execution_function = _make_execution_function(model, mode)
        data_iterator = _create_dataset_iterator(strategy,
                                                 adapter.get_dataset())

        callbacks = cbks.configure_callbacks(
            callbacks,
            model,
            do_validation=False,
            batch_size=batch_size,
            epochs=1,
            steps_per_epoch=steps,
            samples=None,
            verbose=0,  # Handle ProgBarLogger separately in this loop.
            mode=mode)

        with training_context.on_start(model, callbacks, verbose, mode):
            # TODO(scottzhu): Handle TPUStrategy training loop
            with training_context.on_epoch(0, mode) as epoch_logs:
                model.reset_metrics()
                result = run_one_epoch(model,
                                       data_iterator,
                                       execution_function,
                                       dataset_size=adapter.get_size(),
                                       strategy=strategy,
                                       steps_per_epoch=steps,
                                       mode=mode,
                                       training_context=training_context,
                                       current_epoch=1)
                cbks.make_logs(model, epoch_logs, result, mode)

        if strategy:
            scope.__exit__(None, None, None)

        if len(result) == 1:
            result = result[0]
        return result
예제 #10
0
    def fit(self,
            model,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_freq=1,
            **kwargs):
        batch_size = model._validate_or_infer_batch_size(
            batch_size, steps_per_epoch, x)

        strategy = _get_distribution_strategy(model)
        if strategy:
            batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
                strategy, x, batch_size, steps_per_epoch, ModeKeys.TRAIN)
            dist_utils.validate_callbacks(input_callbacks=callbacks,
                                          optimizer=model.optimizer)
            # Enter tf.distribute.Strategy scope.
            scope = dist_utils.distributed_scope(strategy=strategy,
                                                 learning_phase=1)
            scope.__enter__()

        training_data_adapter, validation_adapter = _process_training_inputs(
            model,
            x,
            y,
            batch_size=batch_size,
            sample_weights=sample_weight,
            class_weights=class_weight,
            validation_split=validation_split,
            steps_per_epoch=steps_per_epoch,
            shuffle=shuffle,
            validation_data=validation_data,
            validation_steps=validation_steps,
            distribution_strategy=strategy)

        do_validation = (validation_adapter is not None)

        if not steps_per_epoch:
            steps_per_epoch = training_data_adapter.get_size()

        # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
        training_context = TrainingContext()

        initial_epoch = model._maybe_load_initial_epoch_from_ckpt(
            initial_epoch, ModeKeys.TRAIN)

        _update_sample_weight_mode(model, ModeKeys.TRAIN,
                                   training_data_adapter)
        training_function = _make_execution_function(model, ModeKeys.TRAIN)

        training_data_iter = None
        # Only recreate iterator when the data has a fixed length, which will be
        # fully consumed every epoch, or has a unknown length (dataset, generator)
        # and will be fully consumed (steps_per_epoch is None)
        recreate_training_iterator = (training_data_adapter.get_size()
                                      is not None or steps_per_epoch is None)

        if do_validation:
            if not validation_steps:
                validation_steps = validation_adapter.get_size()
            eval_function = _make_execution_function(model, ModeKeys.TEST)
            eval_data_iter = None
            recreate_eval_iterator = (validation_adapter.get_size() is not None
                                      or validation_steps is None)

        callbacks = cbks.configure_callbacks(
            callbacks,
            model,
            do_validation=do_validation,
            batch_size=batch_size,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            samples=None,
            verbose=0,  # Handle ProgBarLogger separately in this loop.
            mode=ModeKeys.TRAIN)

        with training_context.on_start(model, callbacks, verbose,
                                       ModeKeys.TRAIN):
            # TODO(scottzhu): Handle TPUStrategy training loop
            for epoch in range(initial_epoch, epochs):
                if training_context.callbacks.model.stop_training:
                    break

                # Training
                with training_context.on_epoch(epoch,
                                               ModeKeys.TRAIN) as epoch_logs:
                    model.reset_metrics()
                    if training_data_iter is None or recreate_training_iterator:
                        training_data_iter = _create_dataset_iterator(
                            strategy, training_data_adapter.get_dataset())

                    training_result = run_one_epoch(
                        model,
                        training_data_iter,
                        training_function,
                        dataset_size=training_data_adapter.get_size(),
                        strategy=strategy,
                        steps_per_epoch=steps_per_epoch,
                        mode=ModeKeys.TRAIN,
                        training_context=training_context,
                        current_epoch=epoch)
                    cbks.make_logs(model, epoch_logs, training_result,
                                   ModeKeys.TRAIN)

                    # Evaluation
                    if (do_validation and training_utils.should_run_validation(
                            validation_freq, epoch)
                            and not callbacks.model.stop_training):
                        if eval_data_iter is None or recreate_eval_iterator:
                            eval_data_iter = _create_dataset_iterator(
                                strategy, validation_adapter.get_dataset())
                        eval_context = TrainingContext()
                        with eval_context.on_start(model,
                                                   callbacks,
                                                   verbose=0,
                                                   mode=ModeKeys.TEST):
                            with eval_context.on_epoch(epoch, ModeKeys.TEST):
                                model.reset_metrics()
                                eval_result = run_one_epoch(
                                    model,
                                    eval_data_iter,
                                    eval_function,
                                    dataset_size=validation_adapter.get_size(),
                                    strategy=strategy,
                                    steps_per_epoch=validation_steps,
                                    mode=ModeKeys.TEST,
                                    training_context=eval_context,
                                    current_epoch=epochs)
                                cbks.make_logs(model,
                                               epoch_logs,
                                               eval_result,
                                               ModeKeys.TRAIN,
                                               prefix='val_')

        if strategy:
            scope.__exit__(None, None, None)

        return model.history
예제 #11
0
  def _model_iteration(
      self, model, mode, x=None, y=None, batch_size=None, verbose=1,
      sample_weight=None, steps=None, callbacks=None, **kwargs):

    batch_size = model._validate_or_infer_batch_size(
        batch_size, steps, x)
    strategy = _get_distribution_strategy(model)
    batch_size, steps = dist_utils.process_batch_and_step_size(
        strategy, x, batch_size, steps, mode)
    dist_utils.validate_callbacks(input_callbacks=callbacks,
                                  optimizer=model.optimizer)
    # Enter tf.distribute.Strategy scope.
    with dist_utils.distributed_scope(
        strategy=strategy, learning_phase=0):

      adapter = _process_inputs(
          model,
          x,
          y,
          batch_size=batch_size,
          sample_weights=sample_weight,
          steps=steps,
          distribution_strategy=strategy)

      if not steps:
        steps = adapter.get_size()

      # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
      training_context = TrainingContext()

      dataset = adapter.get_dataset()
      # Raise an error if `steps` isn't specified but the dataset
      # is infinite.
      # TODO(scottzhu): This check should probably happen in the adapter
      training_utils.infer_steps_for_dataset(
          dataset, steps, steps_name='steps', epochs=0)
      dataset = strategy.experimental_distribute_dataset(dataset)

      _update_sample_weight_mode(model, mode, dataset)
      execution_function = training_v2_utils._get_or_make_execution_function(
          model, mode)

      data_iterator = iter(dataset)

      callbacks = cbks.configure_callbacks(
          callbacks,
          model,
          do_validation=False,
          batch_size=batch_size,
          epochs=1,
          steps_per_epoch=steps,
          samples=None,
          verbose=0,  # Handle ProgBarLogger separately in this loop.
          mode=mode)

      with training_context.on_start(model, callbacks, verbose, mode):
        # TODO(scottzhu): Handle TPUStrategy training loop
        with training_context.on_epoch(0, mode) as epoch_logs:
          model.reset_metrics()
          result = run_one_epoch(
              model,
              data_iterator,
              execution_function,
              dataset_size=adapter.get_size(),
              strategy=strategy,
              steps_per_epoch=steps,
              mode=mode,
              training_context=training_context,
              total_epochs=1)
          cbks.make_logs(model, epoch_logs, result, mode)

    if len(result) == 1:
      result = result[0]
    return result
예제 #12
0
def fit_distributed(model,
                    x=None,
                    y=None,
                    batch_size=None,
                    epochs=1,
                    verbose=1,
                    callbacks=None,
                    validation_split=0.,
                    validation_data=None,
                    shuffle=True,
                    class_weight=None,
                    sample_weight=None,
                    initial_epoch=0,
                    steps_per_epoch=None,
                    validation_steps=None,
                    validation_freq=1):
  """Fit loop for Distribution Strategies."""
  distributed_training_utils.validate_callbacks(callbacks, model.optimizer)
  distributed_training_utils.validate_inputs(
      x, y, model._distribution_strategy)

  first_x_value = nest.flatten(x)[0]
  if isinstance(first_x_value, np.ndarray):
    # Until support for partial batch is implemented across all
    # functions and distribution strategy, we pass `mode` to selectively
    # relax the costraint to consume all the training samples.
    steps_per_epoch, batch_size = (
        distributed_training_utils.get_input_params(
            model._distribution_strategy, first_x_value, steps_per_epoch,
            batch_size, mode=ModeKeys.TRAIN))
  batch_size = model._validate_or_infer_batch_size(
      batch_size, steps_per_epoch, x)
  dataset = model._distribution_standardize_user_data(
      x, y,
      sample_weight=sample_weight,
      class_weight=class_weight,
      batch_size=batch_size,
      validation_split=validation_split,
      shuffle=shuffle,
      repeat=True)

  val_dataset = None
  if validation_data:
    val_x, val_y, val_sample_weights = model._unpack_validation_data(
        validation_data)
    distributed_training_utils.validate_inputs(
        val_x, val_y, model._distribution_strategy)
    first_valx_value = nest.flatten(val_x)[0]
    if isinstance(first_valx_value, np.ndarray):
      validation_steps, _ = distributed_training_utils.get_input_params(
          model._distribution_strategy, first_valx_value, validation_steps,
          batch_size)
    val_dataset = model._distribution_standardize_user_data(
        val_x, val_y,
        sample_weight=val_sample_weights,
        class_weight=None,
        batch_size=batch_size,
        validation_split=validation_split,
        shuffle=shuffle)
  elif validation_split:
    raise ValueError('validation_split argument is not supported with '
                     'distribution strategies.')

  if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
    return experimental_tpu_fit_loop(
        model,
        dataset,
        epochs=epochs,
        verbose=verbose,
        callbacks=callbacks,
        val_dataset=val_dataset,
        initial_epoch=initial_epoch,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        validation_freq=validation_freq)
  else:
    return training_arrays.fit_loop(
        model,
        dataset,
        batch_size=batch_size,
        epochs=epochs,
        verbose=verbose,
        callbacks=callbacks,
        val_inputs=val_dataset,
        shuffle=shuffle,
        initial_epoch=initial_epoch,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        validation_freq=validation_freq,
        steps_name='steps_per_epoch')
예제 #13
0
    def fit(self,
            model,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_freq=1,
            **kwargs):
        """Fit loop for Distribution Strategies."""
        dist_utils.validate_callbacks(input_callbacks=callbacks,
                                      optimizer=model.optimizer)
        dist_utils.validate_inputs(x, y)

        batch_size, steps_per_epoch = self._process_batch_and_step_size(
            model, x, batch_size, steps_per_epoch, ModeKeys.TRAIN)
        batch_size = model._validate_or_infer_batch_size(
            batch_size, steps_per_epoch, x)
        dataset = model._distribution_standardize_user_data(
            x,
            y,
            sample_weight=sample_weight,
            class_weight=class_weight,
            batch_size=batch_size,
            validation_split=validation_split,
            shuffle=shuffle,
            epochs=epochs)
        if not dist_utils.is_distributing_by_cloning(model):
            with model._distribution_strategy.scope():
                (dataset, _, _) = model._standardize_user_data(
                    dataset,
                    sample_weight=sample_weight,
                    class_weight=class_weight,
                    batch_size=batch_size,
                    validation_split=validation_split,
                    shuffle=shuffle)

        val_dataset = None
        if validation_data:
            val_x, val_y, val_sample_weights = model._unpack_validation_data(
                validation_data)
            dist_utils.validate_inputs(val_x, val_y)
            _, validation_steps = self._process_batch_and_step_size(
                model, val_x, batch_size, validation_steps, ModeKeys.TEST)

            val_dataset = model._distribution_standardize_user_data(
                val_x,
                val_y,
                sample_weight=val_sample_weights,
                class_weight=None,
                batch_size=batch_size,
                validation_split=validation_split,
                shuffle=shuffle,
                allow_partial_batch=True)
        elif validation_split:
            raise ValueError('validation_split argument is not supported with '
                             'distribution strategies.')

        if dist_utils.is_tpu_strategy(model._distribution_strategy):
            return experimental_tpu_fit_loop(model,
                                             dataset,
                                             epochs=epochs,
                                             verbose=verbose,
                                             callbacks=callbacks,
                                             val_dataset=val_dataset,
                                             initial_epoch=initial_epoch,
                                             steps_per_epoch=steps_per_epoch,
                                             validation_steps=validation_steps,
                                             validation_freq=validation_freq)
        else:
            return training_arrays.fit_loop(model,
                                            dataset,
                                            batch_size=batch_size,
                                            epochs=epochs,
                                            verbose=verbose,
                                            callbacks=callbacks,
                                            val_inputs=val_dataset,
                                            shuffle=shuffle,
                                            initial_epoch=initial_epoch,
                                            steps_per_epoch=steps_per_epoch,
                                            validation_steps=validation_steps,
                                            validation_freq=validation_freq,
                                            steps_name='steps_per_epoch')