def _process_inputs(model,
                    x,
                    y,
                    batch_size=None,
                    sample_weights=None,
                    class_weights=None,
                    shuffle=False,
                    steps=None,
                    distribution_strategy=None):
    """Process the inputs for fit/eval/predict()."""
    adapter_cls = data_adapter.select_data_adapter(x, y)
    if adapter_cls in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
        x, y, sample_weights = model._standardize_user_data(
            x,
            y,
            sample_weight=sample_weights,
            class_weight=class_weights,
            batch_size=batch_size,
            check_steps=True,
            steps=steps)
    adapter = adapter_cls(x,
                          y,
                          batch_size=batch_size,
                          steps=steps,
                          sample_weights=sample_weights,
                          shuffle=shuffle,
                          distribution_strategy=distribution_strategy)
    # As a fallback for the data type that does not work with
    # _standardize_user_data, use the _prepare_model_with_inputs.
    if adapter_cls not in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
        training_v2_utils._prepare_model_with_inputs(model,
                                                     adapter.get_dataset())
    return adapter
Beispiel #2
0
def _process_inputs(model,
                    x,
                    y,
                    batch_size=None,
                    sample_weights=None,
                    class_weights=None,
                    shuffle=False,
                    steps=None,
                    distribution_strategy=None):
    """Process the inputs for fit/eval/predict()."""
    adapter_cls = data_adapter.select_data_adapter(x, y)
    if adapter_cls in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
        x, y, sample_weights = model._standardize_user_data(
            x,
            y,
            sample_weight=sample_weights,
            class_weight=class_weights,
            batch_size=batch_size,
            check_steps=True,
            steps=steps)
        # TODO(scottzhu): The generator and keras.sequence does not work with
        # model._standardize_user_data() so far. However that method is very
        # important which contains on-fly model build/tensor align for dict input,
        # etc. We should still call the _standardize_user_data with the peeked data
        # from generator or sequence, and let model compile.
    return adapter_cls(x,
                       y,
                       batch_size=batch_size,
                       sample_weights=sample_weights,
                       shuffle=shuffle,
                       distribution_strategy=distribution_strategy)
Beispiel #3
0
def _process_inputs(model,
                    mode,
                    x,
                    y,
                    batch_size=None,
                    epochs=1,
                    sample_weights=None,
                    class_weights=None,
                    shuffle=False,
                    steps=None,
                    distribution_strategy=None,
                    max_queue_size=10,
                    workers=1,
                    use_multiprocessing=False):
  """Process the inputs for fit/eval/predict()."""
  adapter_cls = data_adapter.select_data_adapter(x, y)
  if adapter_cls in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
    x, y, sample_weights = model._standardize_user_data(
        x,
        y,
        sample_weight=sample_weights,
        class_weight=class_weights,
        batch_size=batch_size,
        check_steps=False,
        steps=steps)

  if mode == ModeKeys.PREDICT:
    sample_weight_modes = None
  else:
    sample_weight_modes = [
        e.sample_weight_mode for e in model._training_endpoints
    ]

  adapter = adapter_cls(
      x,
      y,
      batch_size=batch_size,
      epochs=epochs,
      steps=steps,
      sample_weights=sample_weights,
      sample_weight_modes=sample_weight_modes,
      shuffle=shuffle,
      distribution_strategy=distribution_strategy,
      max_queue_size=max_queue_size,
      workers=workers,
      use_multiprocessing=use_multiprocessing)
  # As a fallback for the data type that does not work with
  # _standardize_user_data, use the _prepare_model_with_inputs.
  if adapter_cls not in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
    training_v2_utils._prepare_model_with_inputs(model, adapter.get_dataset())
  return adapter
Beispiel #4
0
def should_fallback_to_v1_for_callback(inputs, callbacks):
    """Whether to fallback to v1 training loop because of callbacks.

  This is only a temporary solution until the v2 training loop is fixed for
  using batch based callbacks.

  Args:
    inputs: the inputs to the model. Certain input type might not handle certain
      callbacks well if it need batch based counting.
    callbacks: list of callbacks configured for the fit/eval/predict.

  Returns:
    boolean, whether it should fallbacks to use v1 training loop.
  """
    try:
        adapter_cls = data_adapter.select_data_adapter(inputs, None)
        if adapter_cls not in (data_adapter.GeneratorDataAdapter,
                               data_adapter.DatasetAdapter):
            # For any input data that we know the overall size, eg numpy, list of
            # list, etc, we don't need to fallback since the v2 loop can get the batch
            # size.
            return False
    except ValueError:
        # In case we can't find the adapter, then we should fallback to v1.
        return True

    callbacks = callbacks or []
    for c in callbacks:
        if isinstance(c, cbks.ModelCheckpoint) and isinstance(
                c.save_freq, int):
            return True
        elif (isinstance(c, cbks.TensorBoard)
              and isinstance(c.update_freq, int) and
              c.update_freq > 1):  # This is a implementation detail for TB.
            return True
    return False
Beispiel #5
0
def _process_training_inputs(model,
                             x,
                             y,
                             batch_size=None,
                             epochs=1,
                             sample_weights=None,
                             class_weights=None,
                             steps_per_epoch=None,
                             validation_split=0.,
                             validation_data=None,
                             validation_steps=None,
                             shuffle=True,
                             distribution_strategy=None,
                             max_queue_size=10,
                             workers=1,
                             use_multiprocessing=False):
  """Process the data input for fit() with respect to validation_split."""
  if validation_split and 0. < validation_split < 1. and validation_data:
    raise ValueError('validation_data and validation_split cannot be used '
                     'at same time.')

  adapter_cls = data_adapter.select_data_adapter(x, y)

  # Handle validation_split, we want to split the data and get the training
  # section before we give it to data adapter.
  if validation_split and 0. < validation_split < 1.:
    if adapter_cls not in _ADAPTER_FOR_VALIDATION_SPLIT:
      raise ValueError(
          '`validation_split` argument is not supported when '
          'data adapter is {}. Received: x={}, validation_split={}'.format(
              adapter_cls, x, validation_split))
    # Retrieve the training section from x and y, and then construct dataset
    # from it.
    x, y, sample_weights = model._standardize_user_data(
        x,
        y,
        sample_weight=sample_weights,
        class_weight=class_weights,
        batch_size=batch_size,
        check_steps=False,
        steps=steps_per_epoch)
    (x, y, sample_weights,
     val_x, val_y,
     val_sample_weights) = training_utils.split_training_and_validation_data(
         x, y, sample_weights, validation_split)

    sample_weight_modes = [
        e.sample_weight_mode for e in model._training_endpoints
    ]
    train_adapter = adapter_cls(
        x,
        y,
        batch_size=batch_size,
        epochs=epochs,
        sample_weights=sample_weights,
        sample_weight_modes=sample_weight_modes,
        shuffle=shuffle,
        distribution_strategy=distribution_strategy)

    val_adapter = adapter_cls(
        val_x,
        val_y,
        sample_weights=val_sample_weights,
        sample_weight_modes=sample_weight_modes,
        batch_size=batch_size,
        distribution_strategy=distribution_strategy)
  else:
    train_adapter = _process_inputs(
        model,
        ModeKeys.TRAIN,
        x,
        y,
        sample_weights=sample_weights,
        batch_size=batch_size,
        epochs=epochs,
        class_weights=class_weights,
        shuffle=shuffle,
        steps=steps_per_epoch,
        distribution_strategy=distribution_strategy,
        max_queue_size=max_queue_size,
        workers=workers,
        use_multiprocessing=use_multiprocessing)
    val_adapter = None
    if validation_data:
      (val_x, val_y,
       val_sample_weights) = training_utils.unpack_validation_data(
           validation_data, raise_if_ambiguous=False)
      # For eval data, we use a representative batch size of the
      # training data if batch_size was unknown.
      # This is useful for generator/sequence training data input with numpy
      # validation data input.
      if not batch_size:
        batch_size = train_adapter.representative_batch_size()
      val_adapter = _process_inputs(
          model,
          ModeKeys.TEST,
          val_x,
          val_y,
          sample_weights=val_sample_weights,
          batch_size=batch_size,
          class_weights=class_weights,
          steps=validation_steps,
          distribution_strategy=distribution_strategy)
    elif validation_steps:
      raise ValueError('`validation_steps` should not be specified if '
                       '`validation_data` is None.')
  return train_adapter, val_adapter
Beispiel #6
0
def _process_inputs(model,
                    mode,
                    x,
                    y,
                    batch_size=None,
                    epochs=1,
                    sample_weights=None,
                    class_weights=None,
                    shuffle=False,
                    steps=None,
                    distribution_strategy=None,
                    max_queue_size=10,
                    workers=1,
                    use_multiprocessing=False):
  """Process the inputs for fit/eval/predict()."""
  adapter_cls = data_adapter.select_data_adapter(x, y)
  standardize = functools.partial(
      model._standardize_user_data,
      class_weight=class_weights,
      batch_size=batch_size,
      check_steps=False,
      steps=steps)
  if adapter_cls in _ADAPTER_FOR_STANDARDIZE_USER_DATA:
    standardize_function = None
    x, y, sample_weights = standardize(
        x, y, sample_weight=sample_weights)
  elif adapter_cls is data_adapter.ListsOfScalarsDataAdapter:
    standardize_function = standardize
  else:
    def standardize_function(dataset):
      """Data adapters can standardize when appropriate."""
      # First we call _standardize_user_data with the dataset since that has
      # enough structure to build the model.
      if not model._is_compiled:
        # We don't actually care about the values of these attributes, but they
        # are only created in compile and are accessed in _standardize_user_data
        model._training_endpoints = getattr(model, '_training_endpoints', [])
        model.sample_weight_mode = getattr(model, 'sample_weight_mode', None)

      standardize(dataset, extract_tensors_from_dataset=False)

      # Then we map using only the tensor standardization portion.
      def map_fn(x, y=None, sample_weights=None):
        """Tensor manipulation portion of standardization for Dataset.map."""
        standardized = model._standardize_tensors(
            x, y, sample_weights,
            run_eagerly=False,
            dict_inputs=isinstance(x, dict),
            is_dataset=False,
            class_weight=class_weights,
            batch_size=None)
        x, y, sample_weights = nest._list_to_tuple(standardized)
        if y is None:
          return (x,)
        if sample_weights is None:
          return x, y
        return x, y, sample_weights
      return dataset.map(map_fn, num_parallel_calls=dataset_ops.AUTOTUNE)

  if mode == ModeKeys.PREDICT:
    sample_weight_modes = None
  else:
    sample_weight_modes = [
        e.sample_weight_mode for e in model._training_endpoints
    ] or model.sample_weight_mode

  adapter = adapter_cls(
      x,
      y,
      standardize_function=standardize_function,
      batch_size=batch_size,
      epochs=epochs,
      steps=steps,
      sample_weights=sample_weights,
      sample_weight_modes=sample_weight_modes,
      shuffle=shuffle,
      distribution_strategy=distribution_strategy,
      max_queue_size=max_queue_size,
      workers=workers,
      use_multiprocessing=use_multiprocessing)

  return adapter