def convert_to_generator_like(data, batch_size=None, steps_per_epoch=None, epochs=1, shuffle=False): """Make a generator out of NumPy or EagerTensor inputs. Arguments: data: Either a generator or `keras.utils.data_utils.Sequence` object or `Dataset` or `EagerIterator` or a {1,2,3}-tuple of NumPy arrays or EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)` and may be `None` or `[None]`. batch_size: Used when creating a generator out of tuples of NumPy arrays or EagerTensors. steps_per_epoch: Steps of the generator to run each epoch. epochs: Total number of epochs to run. shuffle: Whether the data should be shuffled. Returns: - Generator or `keras.utils.data_utils.Sequence` or EagerIterator. Raises: - ValueError: If `batch_size` is not provided for NumPy or EagerTensor inputs. """ if isinstance(data, tuple): # Scrub `Nones` that might have been passed for `targets`, `sample_weights`. data = tuple(ele for ele in data if not all(e is None for e in nest.flatten(ele))) if len(data) == 1: data = data[0] if data_utils.is_generator_or_sequence(data) or isinstance( data, iterator_ops.EagerIterator): if isinstance(data, data_utils.Sequence): steps_per_epoch = len(data) return data, steps_per_epoch if isinstance(data, dataset_ops.DatasetV2): return dataset_ops.make_one_shot_iterator(data), steps_per_epoch # Create generator from NumPy or EagerTensor Input. num_samples = int(nest.flatten(data)[0].shape[0]) if batch_size is None: raise ValueError('You must specify `batch_size`') steps_per_epoch = int(math.ceil(num_samples / batch_size)) def _gen(data): """Makes a generator out of a structure of NumPy/EagerTensors.""" index_array = np.arange(num_samples) for _ in range(epochs): if shuffle: np.random.shuffle(index_array) batches = generic_utils.make_batches(num_samples, batch_size) for (batch_start, batch_end) in batches: batch_ids = index_array[batch_start:batch_end] flat_batch_data = training_utils.slice_arrays( nest.flatten(data), batch_ids, contiguous=(not shuffle)) yield nest.pack_sequence_as(data, flat_batch_data) return _gen(data), steps_per_epoch
def convert_to_generator_like(data, batch_size=None, steps_per_epoch=None, epochs=1, shuffle=False): """Make a generator out of NumPy or EagerTensor inputs. Arguments: data: Either a generator or `keras.utils.data_utils.Sequence` object or `Dataset` or `EagerIterator` or a {1,2,3}-tuple of NumPy arrays or EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)` and may be `None` or `[None]`. batch_size: Used when creating a generator out of tuples of NumPy arrays or EagerTensors. steps_per_epoch: Steps of the generator to run each epoch. epochs: Total number of epochs to run. shuffle: Whether the data should be shuffled. Returns: - Generator or `keras.utils.data_utils.Sequence` or EagerIterator. Raises: - ValueError: If `batch_size` is not provided for NumPy or EagerTensor inputs. """ if isinstance(data, tuple): # Scrub `Nones` that might have been passed for `targets`, `sample_weights`. data = tuple( ele for ele in data if not all(e is None for e in nest.flatten(ele))) if len(data) == 1: data = data[0] if data_utils.is_generator_or_sequence(data) or isinstance( data, iterator_ops.EagerIterator): if isinstance(data, data_utils.Sequence): steps_per_epoch = len(data) return data, steps_per_epoch if isinstance(data, dataset_ops.DatasetV2): return dataset_ops.make_one_shot_iterator(data), steps_per_epoch # Create generator from NumPy or EagerTensor Input. num_samples = int(nest.flatten(data)[0].shape[0]) if batch_size is None: raise ValueError('You must specify `batch_size`') steps_per_epoch = int(math.ceil(num_samples / batch_size)) def _gen(data): """Makes a generator out of a structure of NumPy/EagerTensors.""" index_array = np.arange(num_samples) for _ in range(epochs): if shuffle: np.random.shuffle(index_array) batches = generic_utils.make_batches(num_samples, batch_size) for (batch_start, batch_end) in batches: batch_ids = index_array[batch_start:batch_end] flat_batch_data = training_utils.slice_arrays( nest.flatten(data), batch_ids, contiguous=(not shuffle)) yield nest.pack_sequence_as(data, flat_batch_data) return _gen(data), steps_per_epoch
def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers, steps_per_epoch, validation_data, validation_steps, mode, kwargs): """Raises errors if arguments are invalid. Arguments: is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence` instance. is_dataset: Boolean, whether data is a dataset instance. use_multiprocessing: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes. workers: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or `(x, y, sample_weights)`) or a generator or `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. validation_steps: Total number of steps (batches of samples) before declaring validation finished. mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. kwargs: Additional arguments for backwards compatibility. Raises: ValueError: If `steps_per_epoch` or `validation_steps` are not passed for data types that require them, or if unrecognized keyword arguments are passed. """ if not is_sequence and use_multiprocessing and workers > 1: logging.warning( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the `keras.utils.Sequence`' ' class.')) if steps_per_epoch is None and not is_dataset: arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps' raise ValueError('Please specify the number of steps via the ' '`{}` argument.'.format(arg_name)) val_gen = ( data_utils.is_generator_or_sequence(validation_data) or isinstance(validation_data, iterator_ops.IteratorV2)) if (val_gen and not isinstance(validation_data, data_utils.Sequence) and not validation_steps): raise ValueError('Please specify the `validation_steps` argument.') if any(k != 'steps' for k in kwargs): raise ValueError('Invalid arguments passed: {}'.format( [k for k in kwargs if k != 'steps']))
def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers, steps_per_epoch, validation_data, validation_steps, mode, kwargs): """Raises errors if arguments are invalid. Arguments: is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence` instance. is_dataset: Boolean, whether data is a dataset instance. use_multiprocessing: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes. workers: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or `(x, y, sample_weights)`) or a generator or `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. validation_steps: Total number of steps (batches of samples) before declaring validation finished. mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. kwargs: Additional arguments for backwards compatibility. Raises: ValueError: If `steps_per_epoch` or `validation_steps` are not passed for data types that require them, or if unrecognized keyword arguments are passed. """ if not is_sequence and use_multiprocessing and workers > 1: logging.warning( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the `keras.utils.Sequence`' ' class.')) if steps_per_epoch is None and not is_dataset: arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps' raise ValueError('Please specify the number of steps via the ' '`{}` argument.'.format(arg_name)) val_gen = ( data_utils.is_generator_or_sequence(validation_data) or isinstance(validation_data, iterator_ops.IteratorV2)) if (val_gen and not isinstance(validation_data, data_utils.Sequence) and not validation_steps): raise ValueError('Please specify the `validation_steps` argument.') if any(k != 'steps' for k in kwargs): raise ValueError('Invalid arguments passed: {}'.format( [k for k in kwargs if k != 'steps']))
def fit_generator(model, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """See docstring for `Model.fit_generator`.""" epoch = initial_epoch do_validation = bool(validation_data) if not context.executing_eagerly(): model._make_train_function() if do_validation: model._make_test_function() is_sequence = isinstance(generator, data_utils.Sequence) if not is_sequence and use_multiprocessing and workers > 1: logging.warning( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) if steps_per_epoch is None: if is_sequence: steps_per_epoch = len(generator) else: raise ValueError('Please specify the `steps_per_epoch` argument.') if (isinstance(validation_data, dataset_ops.Dataset) and context.executing_eagerly()): validation_data = validation_data.make_one_shot_iterator() val_gen = (data_utils.is_generator_or_sequence(validation_data) or isinstance(validation_data, iterator_ops.EagerIterator)) if (val_gen and not isinstance(validation_data, data_utils.Sequence) and not validation_steps): raise ValueError('Please specify the `validation_steps` argument.') enqueuer = None val_enqueuer = None try: val_x, val_y, val_sample_weights = validation_data, None, None if do_validation and not val_gen: # Prepare data for validation if len(validation_data) == 2: val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence val_sample_weights = None elif len(validation_data) == 3: val_x, val_y, val_sample_weights = validation_data # pylint: disable=unpacking-non-sequence else: raise ValueError('`validation_data` should be a tuple ' '`(val_x, val_y, val_sample_weight)` ' 'or `(val_x, val_y)`. Found: ' + str(validation_data)) val_x, val_y, val_sample_weights = model._standardize_user_data( val_x, val_y, val_sample_weights) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, val_inputs=val_x, val_targets=val_y, val_sample_weights=val_sample_weights, epochs=epochs, validation_steps=validation_steps, steps_per_epoch=steps_per_epoch, verbose=verbose) if workers > 0: if is_sequence: enqueuer = data_utils.OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle) else: enqueuer = data_utils.GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: output_generator = data_utils.iter_sequence_infinite(generator) else: output_generator = generator callbacks.on_train_begin() # Construct epoch logs. epoch_logs = {} while epoch < epochs: for m in model.metrics: m.reset_states() callbacks.on_epoch_begin(epoch) steps_done = 0 batch_index = 0 while steps_done < steps_per_epoch: generator_output = next(output_generator) if not hasattr(generator_output, '__len__'): raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) # build batch logs batch_logs = {} if isinstance(x, list): batch_size = x[0].shape[0] elif isinstance(x, dict): batch_size = list(x.values())[0].shape[0] else: batch_size = x.shape[0] batch_logs['batch'] = int(batch_index) batch_logs['size'] = int(batch_size) callbacks.on_batch_begin(batch_index, batch_logs) outs = model.train_on_batch(x, y, sample_weight=sample_weight, class_weight=class_weight) if not isinstance(outs, list): outs = [outs] for l, o in zip(model.metrics_names, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) batch_index += 1 steps_done += 1 # Epoch finished. if steps_done >= steps_per_epoch and do_validation: if val_gen: val_outs = evaluate_generator( model, validation_data, validation_steps, workers=workers, use_multiprocessing=use_multiprocessing, max_queue_size=max_queue_size) else: # No need for try/except because # data has already been validated. val_outs = model.evaluate( val_x, val_y, batch_size=batch_size, sample_weight=val_sample_weights, verbose=0) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for l, o in zip(model.metrics_names, val_outs): epoch_logs['val_' + l] = o if callbacks.model.stop_training: break callbacks.on_epoch_end(epoch, epoch_logs) epoch += 1 if callbacks.model.stop_training: break except (errors.OutOfRangeError, StopIteration): logging.warning( 'Your dataset iterator ran out of data interrupting testing. ' 'Make sure that your dataset can generate at least `steps_per_epoch` ' 'batches (in this case, %d batches). You may need to use the ' 'repeat() function when building your dataset.', steps_per_epoch) finally: try: if enqueuer is not None: enqueuer.stop() finally: if val_enqueuer is not None: val_enqueuer.stop() callbacks.on_train_end() return model.history
def fit_generator(model, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """See docstring for `Model.fit_generator`.""" epoch = initial_epoch do_validation = bool(validation_data) if not context.executing_eagerly(): model._make_train_function() if do_validation: model._make_test_function() is_sequence = isinstance(generator, data_utils.Sequence) if not is_sequence and use_multiprocessing and workers > 1: logging.warning( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the`keras.utils.Sequence' ' class.')) if steps_per_epoch is None: if is_sequence: steps_per_epoch = len(generator) else: raise ValueError('Please specify the `steps_per_epoch` argument.') if (isinstance(validation_data, dataset_ops.Dataset) and context.executing_eagerly()): validation_data = validation_data.make_one_shot_iterator() val_gen = (data_utils.is_generator_or_sequence(validation_data) or isinstance(validation_data, iterator_ops.EagerIterator)) if (val_gen and not isinstance(validation_data, data_utils.Sequence) and not validation_steps): raise ValueError('Please specify the `validation_steps` argument.') enqueuer = None val_enqueuer = None try: val_x, val_y, val_sample_weights = validation_data, None, None if do_validation and not val_gen: # Prepare data for validation if len(validation_data) == 2: val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence val_sample_weights = None elif len(validation_data) == 3: val_x, val_y, val_sample_weights = validation_data # pylint: disable=unpacking-non-sequence else: raise ValueError( '`validation_data` should be a tuple ' '`(val_x, val_y, val_sample_weight)` ' 'or `(val_x, val_y)`. Found: ' + str(validation_data)) val_x, val_y, val_sample_weights = model._standardize_user_data( val_x, val_y, val_sample_weights) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, val_inputs=val_x, val_targets=val_y, val_sample_weights=val_sample_weights, epochs=epochs, validation_steps=validation_steps, steps_per_epoch=steps_per_epoch, verbose=verbose) if workers > 0: if is_sequence: enqueuer = data_utils.OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle) else: enqueuer = data_utils.GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: if is_sequence: output_generator = data_utils.iter_sequence_infinite(generator) else: output_generator = generator callbacks.on_train_begin() # Construct epoch logs. epoch_logs = {} while epoch < epochs: for m in model.metrics: m.reset_states() callbacks.on_epoch_begin(epoch) steps_done = 0 batch_index = 0 while steps_done < steps_per_epoch: generator_output = next(output_generator) if not hasattr(generator_output, '__len__'): raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) # build batch logs batch_logs = {} if isinstance(x, list): batch_size = x[0].shape[0] elif isinstance(x, dict): batch_size = list(x.values())[0].shape[0] else: batch_size = x.shape[0] batch_logs['batch'] = int(batch_index) batch_logs['size'] = int(batch_size) callbacks.on_batch_begin(batch_index, batch_logs) outs = model.train_on_batch( x, y, sample_weight=sample_weight, class_weight=class_weight) if not isinstance(outs, list): outs = [outs] for l, o in zip(model.metrics_names, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) batch_index += 1 steps_done += 1 # Epoch finished. if steps_done >= steps_per_epoch and do_validation: if val_gen: val_outs = evaluate_generator( model, validation_data, validation_steps, workers=workers, use_multiprocessing=use_multiprocessing, max_queue_size=max_queue_size) else: # No need for try/except because # data has already been validated. val_outs = model.evaluate( val_x, val_y, batch_size=batch_size, sample_weight=val_sample_weights, verbose=0) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for l, o in zip(model.metrics_names, val_outs): epoch_logs['val_' + l] = o if callbacks.model.stop_training: break callbacks.on_epoch_end(epoch, epoch_logs) epoch += 1 if callbacks.model.stop_training: break except (errors.OutOfRangeError, StopIteration): logging.warning( 'Your dataset iterator ran out of data interrupting testing. ' 'Make sure that your dataset can generate at least `steps_per_epoch` ' 'batches (in this case, %d batches). You may need to use the ' 'repeat() function when building your dataset.', steps_per_epoch) finally: try: if enqueuer is not None: enqueuer.stop() finally: if val_enqueuer is not None: val_enqueuer.stop() callbacks.on_train_end() return model.history