def test_calculating_input_params_no_steps_no_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): # Input samples of different sizes input_20_samples = np.zeros((20, 3), dtype=np.float32) input_63_samples = np.zeros((63, 3), dtype=np.float32) input_64_samples = np.zeros((64, 3), dtype=np.float32) # Default global batch size 32 for input with 64 samples run in 2 steps steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=None) self.assertEqual(batch_size, 32 // replica_scale_factor) self.assertEqual(steps, 2) # Computed global batch size 20 is lower than 32 if we pass less samples. steps, batch_size = distributed_training_utils.get_input_params( distribution, input_20_samples, steps=None, batch_size=None) self.assertEqual(batch_size, 20 // replica_scale_factor) self.assertEqual(steps, 1) # Default global batch size 32 cannot be used with 63 samples. with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=None, batch_size=None)
def test_calculating_input_params_with_steps_with_batch_size(self, distribution): with self.cached_session(): input_64_samples = np.zeros((64, 3), dtype=np.float32) # No change to steps and batch size if both specified and feasible steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=5, batch_size=3) self.assertEqual(batch_size, 3) self.assertEqual(steps, 5) # Number of samples is less than global batch size * steps with self.assertRaisesRegexp(ValueError, 'less than samples required'): distributed_training_utils.get_input_params( distribution, input_64_samples, steps=10, batch_size=13)
def predict_distributed(model, x=None, batch_size=None, verbose=0, steps=None, callbacks=None): """Predict loop for Distribution Strategies.""" distributed_training_utils.validate_inputs( x, None, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) dataset = model._distribution_standardize_user_data( x, batch_size=batch_size, check_steps=True, steps_name='steps', steps=steps) if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): # TODO(fchollet): why aren't callbacks supported here? return experimental_tpu_predict_loop( model, dataset, verbose=verbose, steps=steps) else: return training_arrays.predict_loop( model, dataset, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def predict_distributed(model, x=None, batch_size=None, verbose=0, steps=None, callbacks=None): """Predict loop for Distribution Strategies.""" distributed_training_utils.validate_inputs( x, None, model._distribution_strategy, allow_partial_batch=True) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size, mode=ModeKeys.PREDICT) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) dataset = model._distribution_standardize_user_data( x, batch_size=batch_size, repeat=False, allow_partial_batch=True) if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): return experimental_tpu_predict_loop( model, dataset, verbose=verbose, steps=steps, callbacks=callbacks) else: return training_arrays.predict_loop( model, dataset, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def evaluate_distributed(model, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None): """Evaluate loop for Distribution Strategies.""" distributed_training_utils.validate_inputs(x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) dataset = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, batch_size=batch_size) if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): return experimental_tpu_test_loop( model, dataset, verbose=verbose, steps=steps, callbacks=callbacks) else: return training_arrays.test_loop( model, inputs=dataset, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def evaluate_distributed(model, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None): """Evaluate loop for Distribution Strategies.""" distributed_training_utils.validate_inputs(x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) dataset = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, batch_size=batch_size) if distributed_training_utils.is_tpu_strategy( model._distribution_strategy): return experimental_tpu_test_loop(model, dataset, verbose=verbose, steps=steps, callbacks=callbacks) else: return training_arrays.test_loop(model, inputs=dataset, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def predict_distributed(model, x=None, batch_size=None, verbose=0, steps=None, callbacks=None): """Predict loop for Distribution Strategies.""" distributed_training_utils.validate_inputs( x, None, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) iterator = model._distribution_standardize_user_data( x, batch_size=batch_size, check_steps=True, steps_name='steps', steps=steps) if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): # TODO(fchollet): why aren't callbacks supported here? return experimental_tpu_predict_loop( model, iterator, verbose=verbose, steps=steps) else: return training_arrays.predict_loop( model, iterator, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def test_calculating_input_params_no_steps_with_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): input_64_samples = np.zeros((64, 3), dtype=np.float32) # Computed steps is correct for specified batch size steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=16) self.assertEqual(batch_size, 16) self.assertEqual(steps, 4 // replica_scale_factor) # Computed steps is correct for specified batch size steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=32) self.assertEqual(batch_size, 32) self.assertEqual(steps, 2 // replica_scale_factor) # Number of samples is not divisible by the global batch size with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=20) # Number of samples is not divisible by the global batch size with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=3)
def test_calculating_input_params_with_steps_no_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): # Input samples of different sizes input_63_samples = np.zeros((63, 3), dtype=np.float32) input_64_samples = np.zeros((64, 3), dtype=np.float32) # Computed global batch size is correct for number of specified 1 step steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=1, batch_size=None) self.assertEqual(batch_size, 64 // replica_scale_factor) self.assertEqual(steps, 1) # Computed global batch size is correct for number of specified 2 steps steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=2, batch_size=None) self.assertEqual(batch_size, 32 // replica_scale_factor) self.assertEqual(steps, 2) # All samples can not be consumed in specified number of steps with self.assertRaisesRegexp(ValueError, 'not divisible by steps'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=2, batch_size=None) # This cases is different for different strategies due to the # difference in supported batch size being global or per-replica. if replica_scale_factor == 1: # Computed global batch size is correct even if not sharadable steps, batch_size = distributed_training_utils.get_input_params( distribution, input_63_samples, steps=3, batch_size=None) self.assertEqual(batch_size, 21) self.assertEqual(steps, 3) else: # Computed global batch size can not be sharded across replicas with self.assertRaisesRegexp(ValueError, 'could not be sharded evenly ' 'across the sync replicas'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=1, batch_size=None)
def evaluate_distributed(model, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None): """Evaluate loop for Distribution Strategies.""" # TODO(b/122314600): Remove the scope validate. distributed_training_utils.validate_not_in_strategy_scope() distributed_training_utils.validate_inputs(x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps, batch_size = distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps, batch_size) batch_size = model._validate_or_infer_batch_size(batch_size, steps, x) iterator = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, batch_size=batch_size, check_steps=True, steps_name='steps', steps=steps) if distributed_training_utils.is_tpu_strategy( model._distribution_strategy): # TODO(fchollet): why aren't callbacks supported here? return experimental_tpu_test_loop(model, iterator=iterator, verbose=verbose, steps=steps) else: return training_arrays.test_loop(model, inputs=iterator, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks)
def fit_distributed(model, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1): """Fit loop for Distribution Strategies.""" distributed_training_utils.validate_callbacks(callbacks, model.optimizer) distributed_training_utils.validate_inputs( x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): # Until support for partial batch is implemented across all # functions and distribution strategy, we pass `mode` to selectively # relax the costraint to consume all the training samples. steps_per_epoch, batch_size = ( distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps_per_epoch, batch_size, mode=ModeKeys.TRAIN)) batch_size = model._validate_or_infer_batch_size( batch_size, steps_per_epoch, x) steps_name = 'steps_per_epoch' if isinstance(x, dataset_ops.DatasetV2): steps_per_epoch = training_utils.infer_steps_for_dataset( x, steps_per_epoch, steps_name=steps_name) dataset = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size, check_steps=True, steps_name=steps_name, steps=steps_per_epoch, validation_split=validation_split, shuffle=shuffle) val_dataset = None if validation_data: val_x, val_y, val_sample_weights = model._unpack_validation_data( validation_data) distributed_training_utils.validate_inputs( val_x, val_y, model._distribution_strategy) first_valx_value = nest.flatten(val_x)[0] if isinstance(first_valx_value, np.ndarray): validation_steps, _ = distributed_training_utils.get_input_params( model._distribution_strategy, first_valx_value, validation_steps, batch_size) steps_name = 'validation_steps' if isinstance(val_x, dataset_ops.DatasetV2): validation_steps = training_utils.infer_steps_for_dataset( val_x, validation_steps, steps_name=steps_name) val_dataset = model._distribution_standardize_user_data( val_x, val_y, sample_weight=val_sample_weights, class_weight=None, batch_size=batch_size, check_steps=True, steps_name=steps_name, steps=validation_steps, validation_split=validation_split, shuffle=shuffle) elif validation_split: raise ValueError('validation_split argument is not supported with ' 'distribution strategies.') if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): return experimental_tpu_fit_loop( model, dataset, epochs=epochs, verbose=verbose, callbacks=callbacks, val_dataset=val_dataset, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=1) else: return training_arrays.fit_loop( model, dataset, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, val_inputs=val_dataset, shuffle=shuffle, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq)
def fit_distributed(model, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1): """Fit loop for Distribution Strategies.""" distributed_training_utils.validate_callbacks(callbacks, model.optimizer) distributed_training_utils.validate_inputs(x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): # Until support for partial batch is implemented across all # functions and distribution strategy, we pass `mode` to selectively # relax the costraint to consume all the training samples. steps_per_epoch, batch_size = ( distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps_per_epoch, batch_size, mode=ModeKeys.TRAIN)) batch_size = model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) dataset = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size, check_steps=True, steps_name='steps_per_epoch', steps=steps_per_epoch, validation_split=validation_split, shuffle=shuffle) val_dataset = None if validation_data: val_x, val_y, val_sample_weights = model._unpack_validation_data( validation_data) distributed_training_utils.validate_inputs( val_x, val_y, model._distribution_strategy) first_valx_value = nest.flatten(val_x)[0] if isinstance(first_valx_value, np.ndarray): validation_steps, _ = distributed_training_utils.get_input_params( model._distribution_strategy, first_valx_value, validation_steps, batch_size) val_dataset = model._distribution_standardize_user_data( val_x, val_y, sample_weight=val_sample_weights, class_weight=None, batch_size=batch_size, check_steps=True, steps_name='validation_steps', steps=validation_steps, validation_split=validation_split, shuffle=shuffle) elif validation_split: raise ValueError('validation_split argument is not supported with ' 'distribution strategies.') if distributed_training_utils.is_tpu_strategy( model._distribution_strategy): return experimental_tpu_fit_loop(model, dataset, epochs=epochs, verbose=verbose, callbacks=callbacks, val_dataset=val_dataset, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=1) else: return training_arrays.fit_loop(model, dataset, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, val_inputs=val_dataset, shuffle=shuffle, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq)
def fit_distributed(model, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None): """Fit loop for Distribution Strategies.""" distributed_training_utils.validate_callbacks(callbacks, model.optimizer) distributed_training_utils.validate_inputs( x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps_per_epoch, batch_size = ( distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps_per_epoch, batch_size, is_training=True)) batch_size = model._validate_or_infer_batch_size( batch_size, steps_per_epoch, x) iterator = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size, check_steps=True, steps_name='steps_per_epoch', steps=steps_per_epoch, validation_split=validation_split, shuffle=shuffle) val_iterator = None if validation_data: val_x, val_y, val_sample_weights = model._unpack_validation_data( validation_data) distributed_training_utils.validate_inputs( val_x, val_y, model._distribution_strategy) first_valx_value = nest.flatten(val_x)[0] if isinstance(first_valx_value, np.ndarray): validation_steps, _ = distributed_training_utils.get_input_params( model._distribution_strategy, first_valx_value, validation_steps, batch_size) val_iterator = model._distribution_standardize_user_data( val_x, val_y, sample_weight=val_sample_weights, class_weight=None, batch_size=batch_size, check_steps=True, steps_name='validation_steps', steps=validation_steps, validation_split=validation_split, shuffle=shuffle) elif validation_split: raise ValueError('validation_split argument is not supported with ' 'distribution strategies.') if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): return experimental_tpu_fit_loop( model, iterator, epochs=epochs, verbose=verbose, callbacks=callbacks, val_iterator=val_iterator, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: return training_arrays.fit_loop( model, iterator, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, val_inputs=val_iterator, shuffle=shuffle, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps)
def fit_distributed(model, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1): """Fit loop for Distribution Strategies.""" distributed_training_utils.validate_callbacks(callbacks, model.optimizer) distributed_training_utils.validate_inputs( x, y, model._distribution_strategy) first_x_value = nest.flatten(x)[0] if isinstance(first_x_value, np.ndarray): steps_per_epoch, batch_size = ( distributed_training_utils.get_input_params( model._distribution_strategy, first_x_value, steps_per_epoch, batch_size, is_training=True)) batch_size = model._validate_or_infer_batch_size( batch_size, steps_per_epoch, x) dataset = model._distribution_standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, batch_size=batch_size, check_steps=True, steps_name='steps_per_epoch', steps=steps_per_epoch, validation_split=validation_split, shuffle=shuffle) val_dataset = None if validation_data: val_x, val_y, val_sample_weights = model._unpack_validation_data( validation_data) distributed_training_utils.validate_inputs( val_x, val_y, model._distribution_strategy) first_valx_value = nest.flatten(val_x)[0] if isinstance(first_valx_value, np.ndarray): validation_steps, _ = distributed_training_utils.get_input_params( model._distribution_strategy, first_valx_value, validation_steps, batch_size) val_dataset = model._distribution_standardize_user_data( val_x, val_y, sample_weight=val_sample_weights, class_weight=None, batch_size=batch_size, check_steps=True, steps_name='validation_steps', steps=validation_steps, validation_split=validation_split, shuffle=shuffle) elif validation_split: raise ValueError('validation_split argument is not supported with ' 'distribution strategies.') if distributed_training_utils.is_tpu_strategy(model._distribution_strategy): return experimental_tpu_fit_loop( model, dataset, epochs=epochs, verbose=verbose, callbacks=callbacks, val_dataset=val_dataset, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=1) else: return training_arrays.fit_loop( model, dataset, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, val_inputs=val_dataset, shuffle=shuffle, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq)