def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): x = tf.constant([1.0]) with strategy_fn().scope(): with policy.policy_scope("mixed_float16"): # Test on MultiplyLayer layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0) # Test on MultiplyLayerWithoutAutoCast layer = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0)
def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8.0 learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True, ) layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, use_operator=True ) layer3 = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=False ) layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False, ) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=[expected_gradient], ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) if use_loss_scaling: opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
def test_model( self, strategy_fn, use_operator=False, use_regularizer=False, policy_name="mixed_float16", get_config=False, save_format=None, use_input_spec=False, ): self._skip_if_strategy_unsupported(strategy_fn) self._skip_if_save_format_unsupported(save_format) if use_regularizer: weight_regularizer = mp_test_util.IdentityRegularizer() activity_regularizer = mp_test_util.ReduceSumRegularizer() else: weight_regularizer = activity_regularizer = None with strategy_fn().scope(): with policy.policy_scope(policy_name): layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=use_operator, regularizer=weight_regularizer, activity_regularizer=activity_regularizer, input_shape=(1,), ) if use_input_spec: layer.input_spec = input_spec.InputSpec(shape=(None, 1)) model = test_utils.get_model_from_layers( [layer], input_shape=(1,), input_dtype=tf.float16 ) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate not # applied to a float16 value, but instead the float32 variable. opt = gradient_descent.SGD(2**-14) # Use a fixed loss scale, as this test will fail if gradients are # skipped for a step due to dynamic loss scaling. opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=8 ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) # Variable starts at 1, and should have gradient of 2 ** -14 subtracted # from it. expected = 1 - 2**-14 if use_regularizer: # Weight and activity regularizer each add another 2 ** -14 to the # gradient. expected -= 2 * 2**-14 self.assertEqual(backend.eval(layer.v), expected) if save_format: with generic_utils.CustomObjectScope( { "MultiplyLayer": mp_test_util.MultiplyLayer, "loss_fn": loss_fn, } ): self._test_saving(model, dataset, save_format, use_regularizer)