コード例 #1
0
def convert_to_generator_like(data,
                              batch_size=None,
                              steps_per_epoch=None,
                              epochs=1,
                              shuffle=False):
    """Make a generator out of NumPy or EagerTensor inputs.

  Arguments:
    data: Either a generator or `keras.utils.data_utils.Sequence` object or
      `Dataset` or `EagerIterator` or a {1,2,3}-tuple of NumPy arrays or
      EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)`
      and may be `None` or `[None]`.
    batch_size: Used when creating a generator out of tuples of NumPy arrays or
      EagerTensors.
    steps_per_epoch: Steps of the generator to run each epoch.
    epochs: Total number of epochs to run.
    shuffle: Whether the data should be shuffled.

  Returns:
    - Generator or `keras.utils.data_utils.Sequence` or EagerIterator.

  Raises:
    - ValueError: If `batch_size` is not provided for NumPy or EagerTensor
      inputs.
  """
    if isinstance(data, tuple):
        # Scrub `Nones` that might have been passed for `targets`, `sample_weights`.
        data = tuple(ele for ele in data
                     if not all(e is None for e in nest.flatten(ele)))
        if len(data) == 1:
            data = data[0]

    if data_utils.is_generator_or_sequence(data) or isinstance(
            data, iterator_ops.EagerIterator):
        if isinstance(data, data_utils.Sequence):
            steps_per_epoch = len(data)
        return data, steps_per_epoch
    if isinstance(data, dataset_ops.DatasetV2):
        return dataset_ops.make_one_shot_iterator(data), steps_per_epoch

    # Create generator from NumPy or EagerTensor Input.
    num_samples = int(nest.flatten(data)[0].shape[0])
    if batch_size is None:
        raise ValueError('You must specify `batch_size`')
    steps_per_epoch = int(math.ceil(num_samples / batch_size))

    def _gen(data):
        """Makes a generator out of a structure of NumPy/EagerTensors."""
        index_array = np.arange(num_samples)
        for _ in range(epochs):
            if shuffle:
                np.random.shuffle(index_array)
            batches = generic_utils.make_batches(num_samples, batch_size)
            for (batch_start, batch_end) in batches:
                batch_ids = index_array[batch_start:batch_end]
                flat_batch_data = training_utils.slice_arrays(
                    nest.flatten(data), batch_ids, contiguous=(not shuffle))
                yield nest.pack_sequence_as(data, flat_batch_data)

    return _gen(data), steps_per_epoch
コード例 #2
0
def convert_to_generator_like(data,
                              batch_size=None,
                              steps_per_epoch=None,
                              epochs=1,
                              shuffle=False):
  """Make a generator out of NumPy or EagerTensor inputs.

  Arguments:
    data: Either a generator or `keras.utils.data_utils.Sequence` object or
      `Dataset` or `EagerIterator` or a {1,2,3}-tuple of NumPy arrays or
      EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)`
      and may be `None` or `[None]`.
    batch_size: Used when creating a generator out of tuples of NumPy arrays or
      EagerTensors.
    steps_per_epoch: Steps of the generator to run each epoch.
    epochs: Total number of epochs to run.
    shuffle: Whether the data should be shuffled.

  Returns:
    - Generator or `keras.utils.data_utils.Sequence` or EagerIterator.

  Raises:
    - ValueError: If `batch_size` is not provided for NumPy or EagerTensor
      inputs.
  """
  if isinstance(data, tuple):
    # Scrub `Nones` that might have been passed for `targets`, `sample_weights`.
    data = tuple(
        ele for ele in data if not all(e is None for e in nest.flatten(ele)))
    if len(data) == 1:
      data = data[0]

  if data_utils.is_generator_or_sequence(data) or isinstance(
      data, iterator_ops.EagerIterator):
    if isinstance(data, data_utils.Sequence):
      steps_per_epoch = len(data)
    return data, steps_per_epoch
  if isinstance(data, dataset_ops.DatasetV2):
    return dataset_ops.make_one_shot_iterator(data), steps_per_epoch

  # Create generator from NumPy or EagerTensor Input.
  num_samples = int(nest.flatten(data)[0].shape[0])
  if batch_size is None:
    raise ValueError('You must specify `batch_size`')
  steps_per_epoch = int(math.ceil(num_samples / batch_size))

  def _gen(data):
    """Makes a generator out of a structure of NumPy/EagerTensors."""
    index_array = np.arange(num_samples)
    for _ in range(epochs):
      if shuffle:
        np.random.shuffle(index_array)
      batches = generic_utils.make_batches(num_samples, batch_size)
      for (batch_start, batch_end) in batches:
        batch_ids = index_array[batch_start:batch_end]
        flat_batch_data = training_utils.slice_arrays(
            nest.flatten(data), batch_ids, contiguous=(not shuffle))
        yield nest.pack_sequence_as(data, flat_batch_data)

  return _gen(data), steps_per_epoch
コード例 #3
0
def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers,
                        steps_per_epoch, validation_data, validation_steps,
                        mode, kwargs):
  """Raises errors if arguments are invalid.

  Arguments:
    is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence`
      instance.
    is_dataset: Boolean, whether data is a dataset instance.
    use_multiprocessing: Boolean. If `True`, use process-based threading. If
      unspecified, `use_multiprocessing` will default to `False`. Note that
      because this implementation relies on multiprocessing, you should not pass
      non-picklable arguments to the generator as they can't be passed easily to
      children processes.
    workers: Integer. Maximum number of processes to spin up when using
      process-based threading. If unspecified, `workers` will default to 1. If
      0, will execute the generator on the main thread.
    steps_per_epoch: Total number of steps (batches of samples) before declaring
      one epoch finished and starting the next epoch. Ignored with the default
      value of `None`.
    validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x,
      y)` or `(x, y, sample_weights)`) or a generator or
      `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset.
    validation_steps: Total number of steps (batches of samples) before
      declaring validation finished.
    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
    kwargs: Additional arguments for backwards compatibility.

  Raises:
    ValueError: If `steps_per_epoch` or `validation_steps` are not passed
      for data types that require them, or if unrecognized keyword
      arguments are passed.
  """
  if not is_sequence and use_multiprocessing and workers > 1:
    logging.warning(
        UserWarning('Using a generator with `use_multiprocessing=True`'
                    ' and multiple workers may duplicate your data.'
                    ' Please consider using the `keras.utils.Sequence`'
                    ' class.'))

  if steps_per_epoch is None and not is_dataset:
    arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps'
    raise ValueError('Please specify the number of steps via the '
                     '`{}` argument.'.format(arg_name))

  val_gen = (
      data_utils.is_generator_or_sequence(validation_data) or
      isinstance(validation_data, iterator_ops.IteratorV2))
  if (val_gen and not isinstance(validation_data, data_utils.Sequence) and
      not validation_steps):
    raise ValueError('Please specify the `validation_steps` argument.')

  if any(k != 'steps' for k in kwargs):
    raise ValueError('Invalid arguments passed: {}'.format(
        [k for k in kwargs if k != 'steps']))
コード例 #4
0
def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers,
                        steps_per_epoch, validation_data, validation_steps,
                        mode, kwargs):
  """Raises errors if arguments are invalid.

  Arguments:
    is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence`
      instance.
    is_dataset: Boolean, whether data is a dataset instance.
    use_multiprocessing: Boolean. If `True`, use process-based threading. If
      unspecified, `use_multiprocessing` will default to `False`. Note that
      because this implementation relies on multiprocessing, you should not pass
      non-picklable arguments to the generator as they can't be passed easily to
      children processes.
    workers: Integer. Maximum number of processes to spin up when using
      process-based threading. If unspecified, `workers` will default to 1. If
      0, will execute the generator on the main thread.
    steps_per_epoch: Total number of steps (batches of samples) before declaring
      one epoch finished and starting the next epoch. Ignored with the default
      value of `None`.
    validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x,
      y)` or `(x, y, sample_weights)`) or a generator or
      `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset.
    validation_steps: Total number of steps (batches of samples) before
      declaring validation finished.
    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
    kwargs: Additional arguments for backwards compatibility.

  Raises:
    ValueError: If `steps_per_epoch` or `validation_steps` are not passed
      for data types that require them, or if unrecognized keyword
      arguments are passed.
  """
  if not is_sequence and use_multiprocessing and workers > 1:
    logging.warning(
        UserWarning('Using a generator with `use_multiprocessing=True`'
                    ' and multiple workers may duplicate your data.'
                    ' Please consider using the `keras.utils.Sequence`'
                    ' class.'))

  if steps_per_epoch is None and not is_dataset:
    arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps'
    raise ValueError('Please specify the number of steps via the '
                     '`{}` argument.'.format(arg_name))

  val_gen = (
      data_utils.is_generator_or_sequence(validation_data) or
      isinstance(validation_data, iterator_ops.IteratorV2))
  if (val_gen and not isinstance(validation_data, data_utils.Sequence) and
      not validation_steps):
    raise ValueError('Please specify the `validation_steps` argument.')

  if any(k != 'steps' for k in kwargs):
    raise ValueError('Invalid arguments passed: {}'.format(
        [k for k in kwargs if k != 'steps']))
コード例 #5
0
def fit_generator(model,
                  generator,
                  steps_per_epoch=None,
                  epochs=1,
                  verbose=1,
                  callbacks=None,
                  validation_data=None,
                  validation_steps=None,
                  class_weight=None,
                  max_queue_size=10,
                  workers=1,
                  use_multiprocessing=False,
                  shuffle=True,
                  initial_epoch=0):
    """See docstring for `Model.fit_generator`."""
    epoch = initial_epoch

    do_validation = bool(validation_data)
    if not context.executing_eagerly():
        model._make_train_function()
        if do_validation:
            model._make_test_function()

    is_sequence = isinstance(generator, data_utils.Sequence)
    if not is_sequence and use_multiprocessing and workers > 1:
        logging.warning(
            UserWarning('Using a generator with `use_multiprocessing=True`'
                        ' and multiple workers may duplicate your data.'
                        ' Please consider using the`keras.utils.Sequence'
                        ' class.'))
    if steps_per_epoch is None:
        if is_sequence:
            steps_per_epoch = len(generator)
        else:
            raise ValueError('Please specify the `steps_per_epoch` argument.')

    if (isinstance(validation_data, dataset_ops.Dataset)
            and context.executing_eagerly()):
        validation_data = validation_data.make_one_shot_iterator()
    val_gen = (data_utils.is_generator_or_sequence(validation_data)
               or isinstance(validation_data, iterator_ops.EagerIterator))
    if (val_gen and not isinstance(validation_data, data_utils.Sequence)
            and not validation_steps):
        raise ValueError('Please specify the `validation_steps` argument.')

    enqueuer = None
    val_enqueuer = None

    try:
        val_x, val_y, val_sample_weights = validation_data, None, None
        if do_validation and not val_gen:
            # Prepare data for validation
            if len(validation_data) == 2:
                val_x, val_y = validation_data  # pylint: disable=unpacking-non-sequence
                val_sample_weights = None
            elif len(validation_data) == 3:
                val_x, val_y, val_sample_weights = validation_data  # pylint: disable=unpacking-non-sequence
            else:
                raise ValueError('`validation_data` should be a tuple '
                                 '`(val_x, val_y, val_sample_weight)` '
                                 'or `(val_x, val_y)`. Found: ' +
                                 str(validation_data))
            val_x, val_y, val_sample_weights = model._standardize_user_data(
                val_x, val_y, val_sample_weights)

        callbacks = cbks.configure_callbacks(
            callbacks,
            model,
            do_validation=do_validation,
            val_inputs=val_x,
            val_targets=val_y,
            val_sample_weights=val_sample_weights,
            epochs=epochs,
            validation_steps=validation_steps,
            steps_per_epoch=steps_per_epoch,
            verbose=verbose)

        if workers > 0:
            if is_sequence:
                enqueuer = data_utils.OrderedEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = data_utils.GeneratorEnqueuer(
                    generator, use_multiprocessing=use_multiprocessing)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()
        else:
            if is_sequence:
                output_generator = data_utils.iter_sequence_infinite(generator)
            else:
                output_generator = generator

        callbacks.on_train_begin()
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:
            for m in model.metrics:
                m.reset_states()
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0
            while steps_done < steps_per_epoch:
                generator_output = next(output_generator)
                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))

                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))
                # build batch logs
                batch_logs = {}
                if isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]
                batch_logs['batch'] = int(batch_index)
                batch_logs['size'] = int(batch_size)
                callbacks.on_batch_begin(batch_index, batch_logs)

                outs = model.train_on_batch(x,
                                            y,
                                            sample_weight=sample_weight,
                                            class_weight=class_weight)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(model.metrics_names, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                batch_index += 1
                steps_done += 1

                # Epoch finished.
                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = evaluate_generator(
                            model,
                            validation_data,
                            validation_steps,
                            workers=workers,
                            use_multiprocessing=use_multiprocessing,
                            max_queue_size=max_queue_size)
                    else:
                        # No need for try/except because
                        # data has already been validated.
                        val_outs = model.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # Same labels assumed.
                    for l, o in zip(model.metrics_names, val_outs):
                        epoch_logs['val_' + l] = o

                if callbacks.model.stop_training:
                    break

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callbacks.model.stop_training:
                break

    except (errors.OutOfRangeError, StopIteration):
        logging.warning(
            'Your dataset iterator ran out of data interrupting testing. '
            'Make sure that your dataset can generate at least `steps_per_epoch` '
            'batches (in this case, %d batches). You may need to use the '
            'repeat() function when building your dataset.', steps_per_epoch)

    finally:
        try:
            if enqueuer is not None:
                enqueuer.stop()
        finally:
            if val_enqueuer is not None:
                val_enqueuer.stop()

    callbacks.on_train_end()
    return model.history
コード例 #6
0
def fit_generator(model,
                  generator,
                  steps_per_epoch=None,
                  epochs=1,
                  verbose=1,
                  callbacks=None,
                  validation_data=None,
                  validation_steps=None,
                  class_weight=None,
                  max_queue_size=10,
                  workers=1,
                  use_multiprocessing=False,
                  shuffle=True,
                  initial_epoch=0):
  """See docstring for `Model.fit_generator`."""
  epoch = initial_epoch

  do_validation = bool(validation_data)
  if not context.executing_eagerly():
    model._make_train_function()
    if do_validation:
      model._make_test_function()

  is_sequence = isinstance(generator, data_utils.Sequence)
  if not is_sequence and use_multiprocessing and workers > 1:
    logging.warning(
        UserWarning('Using a generator with `use_multiprocessing=True`'
                    ' and multiple workers may duplicate your data.'
                    ' Please consider using the`keras.utils.Sequence'
                    ' class.'))
  if steps_per_epoch is None:
    if is_sequence:
      steps_per_epoch = len(generator)
    else:
      raise ValueError('Please specify the `steps_per_epoch` argument.')

  if (isinstance(validation_data, dataset_ops.Dataset) and
      context.executing_eagerly()):
    validation_data = validation_data.make_one_shot_iterator()
  val_gen = (data_utils.is_generator_or_sequence(validation_data) or
             isinstance(validation_data, iterator_ops.EagerIterator))
  if (val_gen and not isinstance(validation_data, data_utils.Sequence) and
      not validation_steps):
    raise ValueError('Please specify the `validation_steps` argument.')

  enqueuer = None
  val_enqueuer = None

  try:
    val_x, val_y, val_sample_weights = validation_data, None, None
    if do_validation and not val_gen:
      # Prepare data for validation
      if len(validation_data) == 2:
        val_x, val_y = validation_data  # pylint: disable=unpacking-non-sequence
        val_sample_weights = None
      elif len(validation_data) == 3:
        val_x, val_y, val_sample_weights = validation_data  # pylint: disable=unpacking-non-sequence
      else:
        raise ValueError(
            '`validation_data` should be a tuple '
            '`(val_x, val_y, val_sample_weight)` '
            'or `(val_x, val_y)`. Found: ' + str(validation_data))
      val_x, val_y, val_sample_weights = model._standardize_user_data(
          val_x, val_y, val_sample_weights)

    callbacks = cbks.configure_callbacks(
        callbacks,
        model,
        do_validation=do_validation,
        val_inputs=val_x,
        val_targets=val_y,
        val_sample_weights=val_sample_weights,
        epochs=epochs,
        validation_steps=validation_steps,
        steps_per_epoch=steps_per_epoch,
        verbose=verbose)

    if workers > 0:
      if is_sequence:
        enqueuer = data_utils.OrderedEnqueuer(
            generator,
            use_multiprocessing=use_multiprocessing,
            shuffle=shuffle)
      else:
        enqueuer = data_utils.GeneratorEnqueuer(
            generator,
            use_multiprocessing=use_multiprocessing)
      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
      output_generator = enqueuer.get()
    else:
      if is_sequence:
        output_generator = data_utils.iter_sequence_infinite(generator)
      else:
        output_generator = generator

    callbacks.on_train_begin()
    # Construct epoch logs.
    epoch_logs = {}
    while epoch < epochs:
      for m in model.metrics:
        m.reset_states()
      callbacks.on_epoch_begin(epoch)
      steps_done = 0
      batch_index = 0
      while steps_done < steps_per_epoch:
        generator_output = next(output_generator)
        if not hasattr(generator_output, '__len__'):
          raise ValueError('Output of generator should be '
                           'a tuple `(x, y, sample_weight)` '
                           'or `(x, y)`. Found: ' + str(generator_output))

        if len(generator_output) == 2:
          x, y = generator_output
          sample_weight = None
        elif len(generator_output) == 3:
          x, y, sample_weight = generator_output
        else:
          raise ValueError('Output of generator should be '
                           'a tuple `(x, y, sample_weight)` '
                           'or `(x, y)`. Found: ' + str(generator_output))
        # build batch logs
        batch_logs = {}
        if isinstance(x, list):
          batch_size = x[0].shape[0]
        elif isinstance(x, dict):
          batch_size = list(x.values())[0].shape[0]
        else:
          batch_size = x.shape[0]
        batch_logs['batch'] = int(batch_index)
        batch_logs['size'] = int(batch_size)
        callbacks.on_batch_begin(batch_index, batch_logs)

        outs = model.train_on_batch(
            x, y, sample_weight=sample_weight, class_weight=class_weight)

        if not isinstance(outs, list):
          outs = [outs]
        for l, o in zip(model.metrics_names, outs):
          batch_logs[l] = o

        callbacks.on_batch_end(batch_index, batch_logs)

        batch_index += 1
        steps_done += 1

        # Epoch finished.
        if steps_done >= steps_per_epoch and do_validation:
          if val_gen:
            val_outs = evaluate_generator(
                model,
                validation_data,
                validation_steps,
                workers=workers,
                use_multiprocessing=use_multiprocessing,
                max_queue_size=max_queue_size)
          else:
            # No need for try/except because
            # data has already been validated.
            val_outs = model.evaluate(
                val_x,
                val_y,
                batch_size=batch_size,
                sample_weight=val_sample_weights,
                verbose=0)
          if not isinstance(val_outs, list):
            val_outs = [val_outs]
          # Same labels assumed.
          for l, o in zip(model.metrics_names, val_outs):
            epoch_logs['val_' + l] = o

        if callbacks.model.stop_training:
          break

      callbacks.on_epoch_end(epoch, epoch_logs)
      epoch += 1
      if callbacks.model.stop_training:
        break

  except (errors.OutOfRangeError, StopIteration):
    logging.warning(
        'Your dataset iterator ran out of data interrupting testing. '
        'Make sure that your dataset can generate at least `steps_per_epoch` '
        'batches (in this case, %d batches). You may need to use the '
        'repeat() function when building your dataset.', steps_per_epoch)

  finally:
    try:
      if enqueuer is not None:
        enqueuer.stop()
    finally:
      if val_enqueuer is not None:
        val_enqueuer.stop()

  callbacks.on_train_end()
  return model.history