def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn): x = tf.constant([1.0]) with strategy_fn().scope(): with policy.policy_scope("mixed_float16"): # Test on MultiplyLayer layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0) # Test on MultiplyLayerWithoutAutoCast layer = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), ) layer(x) (regularizer_loss, ) = layer.losses self.assertEqual(regularizer_loss.dtype, tf.float32) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(regularizer_loss), 1.0)
def test_layer_with_non_autocast_variable(self, strategy_fn): x = tf.constant([1.]) with strategy_fn().scope(): with policy.policy_scope('mixed_float16'): layer = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16) y = layer(x) self.assertEqual(layer.v.dtype, tf.float32) self.assertEqual(y.dtype, tf.float16) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(y), 1.)
def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8.0 learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True, ) layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, use_operator=True ) layer3 = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=False ) layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False, ) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=[expected_gradient], ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) if use_loss_scaling: opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate)