def test_config_policy_v1(self, strategy_fn): x = tf.constant([1.], dtype=tf.float16) with strategy_fn().scope(): layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('mixed_float16', loss_scale=None)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': 'mixed_float16', 'loss_scale': None}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float32') self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float32') # Restoring a PolicyV1 silently converts it to a Policy and drops the loss # scale. self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() # The loss_scale is silently dropped self.assertEqual(config['dtype'], {'class_name': 'Policy', 'config': {'name': 'mixed_float16'}}) layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('float64', loss_scale=2.)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': 'float64', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, 'float64') self.assertEqual(layer(x).dtype, 'float64') self.assertEqual(layer.v.dtype, 'float64') self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config['dtype'], 'float64') layer = mp_test_util.MultiplyLayer(dtype=policy.PolicyV1('_infer', loss_scale=2.)) config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'PolicyV1', 'config': {'name': '_infer', 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0}}}}) layer = mp_test_util.MultiplyLayer.from_config(config) self.assertEqual(layer.dtype, None) self.assertEqual(layer(x).dtype, 'float16') self.assertEqual(layer.v.dtype, 'float16') self.assertEqual(type(layer.dtype_policy), policy.Policy) config = layer.get_config() self.assertEqual(config['dtype'], 'float16')
def test_repr(self): # Test Policy repr for policy in ('float32', 'int8', 'mixed_float16', 'mixed_bfloat16', '_infer'): self.assertEqual(repr(mp_policy.Policy(policy)), '<Policy "%s">' % policy) # Test PolicyV1 repr for policy in ('float32', 'int8', 'mixed_bfloat16', '_infer'): self.assertEqual(repr(mp_policy.PolicyV1(policy)), '<PolicyV1 "%s", loss_scale=None>' % policy) self.assertEqual( repr(mp_policy.PolicyV1('float16', loss_scale=2)), '<PolicyV1 "float16", loss_scale=FixedLossScale(2.0)>') self.assertStartsWith( repr(mp_policy.PolicyV1('mixed_float16')), '<PolicyV1 "mixed_float16", loss_scale=DynamicLossScale(')
def test_loss_scale_warning(self): with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_warn: mp_policy.PolicyV1('float32', loss_scale=2.) self.assertEqual( mock_warn.call_args_list[0][0][0], 'Creating a Policy with a loss scale is only useful for float16 ' 'policies. You passed loss_scale=2.0 for policy float32. Consider ' 'not passing any loss_scale instead.') for policy_name in 'float16', 'mixed_float16': # Trigger any other warnings that occur only once mp_policy.PolicyV1(policy_name, loss_scale=2.) with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_warn: mp_policy.PolicyV1(policy_name, loss_scale=2.) mock_warn.assert_not_called()
def test_policy_v1_without_loss_scale(self): with policy.policy_scope(policy.PolicyV1('mixed_float16', loss_scale=None)): opt = gradient_descent.SGD(1.) x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.compile(opt, loss='mse') self.assertNotIsInstance(model.optimizer, loss_scale_optimizer.LossScaleOptimizer)
def test_loss_scale_optimizer_overrides_policy_v1_loss_scale(self): with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)): opt = gradient_descent.SGD(1.) opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False, initial_scale=5.) x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.compile(opt, loss='mse') self.assertEqual(self.evaluate(model.optimizer.loss_scale), 5.)
def test_pass_invalid_optimizer_with_loss_scaling(self): with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) if tf.executing_eagerly(): error_msg = 'Use a `tf.keras` Optimizer instead' else: error_msg = 'optimizer" must be an instance of ' with self.assertRaisesRegex(ValueError, error_msg): model.compile(optimizer_v1.SGD(1.), 'mse')
def test_loss_scale(self): policy = mp_policy.PolicyV1('float32') self.assertEqual(policy.loss_scale, None) policy = mp_policy.PolicyV1('float32', loss_scale=None) self.assertEqual(policy.loss_scale, None) ls = tf.mixed_precision.experimental.DynamicLossScale() policy = mp_policy.PolicyV1('float32', loss_scale=ls) self.assertIs(policy.loss_scale, ls) policy = mp_policy.PolicyV1('float32', loss_scale='dynamic') self.assertIsInstance(policy.loss_scale, tf.mixed_precision.experimental.DynamicLossScale) policy = mp_policy.PolicyV1('mixed_float16') self.assertIsInstance(policy.loss_scale, tf.mixed_precision.experimental.DynamicLossScale) policy = mp_policy.PolicyV1('mixed_float16', loss_scale=None) self.assertEqual(policy.loss_scale, None) policy = mp_policy.PolicyV1('mixed_bfloat16') self.assertEqual(policy.loss_scale, None)
def test_serialization(self): # Test policies that are equivalent to a single dtype for policy_name in 'float16', 'float32', 'int8', 'string', 'bool': policy = mp_policy.Policy(policy_name) config = mp_policy.serialize(policy) self.assertEqual(config, policy_name) new_policy = mp_policy.deserialize(config) self.assertEqual(str(policy), str(new_policy)) # Test "_infer" policy policy = mp_policy.Policy('_infer') config = mp_policy.serialize(policy) self.assertIsNone(config) new_policy = mp_policy.deserialize(config) self.assertEqual(str(policy), str(new_policy)) class MyPolicy(mp_policy.Policy): pass # Test policies that are not equivalent to a single dtype for policy in (mp_policy.Policy('mixed_float16'), mp_policy.Policy('mixed_bfloat16'), MyPolicy('float32')): config = mp_policy.serialize(policy) self.assertEqual( config, { 'class_name': policy.__class__.__name__, 'config': { 'name': policy.name } }) new_policy = mp_policy.deserialize( config, custom_objects={'MyPolicy': MyPolicy}) self.assertEqual(str(policy), str(new_policy)) # Test V1 policies that override the loss scale for policy in ( mp_policy.PolicyV1('float32', loss_scale=2.), mp_policy.PolicyV1('float32', loss_scale=None), mp_policy.PolicyV1('mixed_float16', loss_scale=2.), mp_policy.PolicyV1('mixed_float16', loss_scale=None), mp_policy.PolicyV1('mixed_bfloat16', loss_scale=2.), mp_policy.PolicyV1('mixed_bfloat16', loss_scale=None), ): config = mp_policy.serialize(policy) expected_loss_scale_config = None if policy.loss_scale: expected_loss_scale_config = { 'class_name': 'FixedLossScale', 'config': { 'loss_scale_value': 2. } } self.assertEqual( config, { 'class_name': policy.__class__.__name__, 'config': { 'name': policy.name, 'loss_scale': expected_loss_scale_config } })
def test_dynamic_loss_scaling(self, strategy_fn, pass_loss_scale_to_policy=False, get_config=False, use_v1_loss_scale_optimizer=False): strategy = strategy_fn() initial_loss_scale = 2. batch_size = 4 expected_gradient = backend.variable([initial_loss_scale / batch_size], dtype=tf.float16) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=tf.bool) with strategy.scope(): opt = gradient_descent.SGD(1.) if pass_loss_scale_to_policy: loss_scale = tf.mixed_precision.experimental.DynamicLossScale( initial_loss_scale=initial_loss_scale, increment_period=2) p = policy.PolicyV1('mixed_float16', loss_scale=loss_scale) elif use_v1_loss_scale_optimizer: loss_scale = tf.mixed_precision.experimental.DynamicLossScale( initial_loss_scale=initial_loss_scale, increment_period=2) p = policy.Policy('mixed_float16') opt = loss_scale_optimizer.LossScaleOptimizerV1( opt, loss_scale) else: p = policy.Policy('mixed_float16') opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2) with policy.policy_scope(p): x = layers.Input( shape=(1,), batch_size=batch_size, dtype=tf.float16) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients)) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=expected_gradient)) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer}) (layer,) = (layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer)) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) model.compile( opt, loss=loss_fn, run_eagerly=testing_utils.should_run_eagerly()) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value(expected_gradient, backend.get_value(expected_gradient * 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value(expected_gradient, backend.get_value(expected_gradient / 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3)