def test_state_structure(self): optimizer = rmsprop.build_rmsprop(0.01) state = optimizer.initialize(_SCALAR_SPEC) self.assertLen(state, 4) self.assertIn(optimizer_base.LEARNING_RATE_KEY, state) self.assertIn(rmsprop._DECAY_KEY, state) self.assertIn(rmsprop._EPSILON_KEY, state) self.assertIn(rmsprop._PRECONDITIONER_KEY, state)
def test_executes_with(self, spec): weights = tf.nest.map_structure(lambda s: tf.ones(s.shape, s.dtype), spec) gradients = tf.nest.map_structure(lambda s: tf.ones(s.shape, s.dtype), spec) optimizer = rmsprop.build_rmsprop(0.01) state = optimizer.initialize(spec) for _ in range(10): state, weights = optimizer.next(state, weights, gradients) tf.nest.map_structure(lambda w: self.assertTrue(all(tf.math.is_finite(w))), weights)
def test_convergence(self): init_w, fn, grad_fn = optimizer_test_utils.test_quadratic_problem() weights = init_w() self.assertGreater(fn(weights), 5.0) optimizer = rmsprop.build_rmsprop(0.02) state = optimizer.initialize(tf.TensorSpec(weights.shape, weights.dtype)) for _ in range(200): gradients = grad_fn(weights) state, weights = optimizer.next(state, weights, gradients) self.assertLess(fn(weights), 0.005)
class IntegrationTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( ('adagrad_scalar', adagrad.build_adagrad(0.1), _SCALAR_SPEC), ('adagrad_struct', adagrad.build_adagrad(0.1), _STRUCT_SPEC), ('adagrad_nested', adagrad.build_adagrad(0.1), _NESTED_SPEC), ('adam_scalar', adam.build_adam(0.1), _SCALAR_SPEC), ('adam_struct', adam.build_adam(0.1), _STRUCT_SPEC), ('adam_nested', adam.build_adam(0.1), _NESTED_SPEC), ('rmsprop_scalar', rmsprop.build_rmsprop(0.1), _SCALAR_SPEC), ('rmsprop_struct', rmsprop.build_rmsprop(0.1), _STRUCT_SPEC), ('rmsprop_nested', rmsprop.build_rmsprop(0.1), _NESTED_SPEC), ('scheduled_sgd_scalar', _scheduled_sgd(), _SCALAR_SPEC), ('scheduled_sgd_struct', _scheduled_sgd(), _STRUCT_SPEC), ('scheduled_sgd_nested', _scheduled_sgd(), _NESTED_SPEC), ('sgd_scalar', sgdm.build_sgdm(0.1), _SCALAR_SPEC), ('sgd_struct', sgdm.build_sgdm(0.1), _STRUCT_SPEC), ('sgd_nested', sgdm.build_sgdm(0.1), _NESTED_SPEC), ('sgdm_scalar', sgdm.build_sgdm(0.1, 0.9), _SCALAR_SPEC), ('sgdm_struct', sgdm.build_sgdm(0.1, 0.9), _STRUCT_SPEC), ('sgdm_nested', sgdm.build_sgdm(0.1, 0.9), _NESTED_SPEC), ('yogi_scalar', yogi.build_yogi(0.1), _SCALAR_SPEC), ('yogi_struct', yogi.build_yogi(0.1), _STRUCT_SPEC), ('yogi_nested', yogi.build_yogi(0.1), _NESTED_SPEC), ) def test_integration_produces_identical_results(self, optimizer, spec): eager_history = _run_in_eager_mode(optimizer, spec) tf_comp_history = _run_in_tf_computation(optimizer, spec) federated_comp_history = _run_in_federated_computation(optimizer, spec) self.assertAllClose(eager_history, tf_comp_history, rtol=1e-5, atol=1e-5) self.assertAllClose(eager_history, federated_comp_history, rtol=1e-5, atol=1e-5)
def test_executes_with_indexed_slices(self): # TF can represent gradients as tf.IndexedSlices. This test makes sure this # case is supported by the optimizer. weights = tf.ones([4, 2]) gradients = tf.IndexedSlices( values=tf.constant([[1.0, 1.0], [1.0, 1.0]]), indices=tf.constant([0, 2]), dense_shape=tf.constant([4, 2])) # Always-zero preconditioner, for simplicity of this test. optimizer = rmsprop.build_rmsprop(0.5, decay=0.0) state = optimizer.initialize(tf.TensorSpec([4, 2])) _, weights = optimizer.next(state, weights, gradients) self.assertAllClose([[0.5, 0.5], [1.0, 1.0], [0.5, 0.5], [1.0, 1.0]], weights)
class ScheduledLROptimizerTest(parameterized.TestCase, tf.test.TestCase): def test_scheduled_sgd_computes_correctly(self): scheduled_sgd = scheduling.schedule_learning_rate( sgdm.build_sgdm(1.0), _example_schedule_fn) weight = tf.constant(1.0) gradient = tf.constant(1.0) state = scheduled_sgd.initialize(tf.TensorSpec((), tf.float32)) state, weight = scheduled_sgd.next(state, weight, gradient) self.assertAllClose(0.9, weight) # Learning rate initially 0.1. state, weight = scheduled_sgd.next(state, weight, gradient) self.assertAllClose(0.8, weight) state, weight = scheduled_sgd.next(state, weight, gradient) self.assertAllClose(0.79, weight) # Learning rate has decreased to 0.01. state, weight = scheduled_sgd.next(state, weight, gradient) self.assertAllClose(0.78, weight) @parameterized.named_parameters( ('adagrad', adagrad.build_adagrad(1.0)), ('adam', adam.build_adam(1.0)), ('rmsprop', rmsprop.build_rmsprop(1.0)), ('sgd', sgdm.build_sgdm(1.0)), ('sgdm', sgdm.build_sgdm(1.0, momentum=0.9)), ('yogi', yogi.build_yogi(1.0)), ) def test_schedule_learning_rate_integrates_with(self, optimizer): scheduled_optimizer = scheduling.schedule_learning_rate( optimizer, _example_schedule_fn) self.assertIsInstance(scheduled_optimizer, optimizer_base.Optimizer) def test_keras_optimizer_raises(self): keras_optimizer = tf.keras.optimizers.SGD(1.0) with self.assertRaises(TypeError): scheduling.schedule_learning_rate(keras_optimizer, _example_schedule_fn) def test_scheduling_scheduled_optimizer_raises(self): scheduled_optimizer = scheduling.schedule_learning_rate( sgdm.build_sgdm(1.0), _example_schedule_fn) twice_scheduled_optimizer = scheduling.schedule_learning_rate( scheduled_optimizer, _example_schedule_fn) with self.assertRaisesRegex(KeyError, 'must have learning rate'): twice_scheduled_optimizer.initialize(tf.TensorSpec((), tf.float32))
def test_math(self): weights = tf.constant([1.0], tf.float32) gradients = tf.constant([2.0], tf.float32) optimizer = rmsprop.build_rmsprop( learning_rate=0.01, decay=0.5, epsilon=0.0) history = [weights] state = optimizer.initialize(_SCALAR_SPEC) for _ in range(4): state, weights = optimizer.next(state, weights, gradients) history.append(weights) self.assertAllClose( [ [1.0], # w0 [0.98585784], # w1 = w0 - 0.01 * 2.0 / sqrt(2.0) [0.97431080], # w2 = w1 - 0.01 * 2.0 / sqrt(3.0) [0.96362036], # w3 = w2 - 0.01 * 2.0 / sqrt(3.5) [0.95329240], # w4 = w3 - 0.01 * 2.0 / sqrt(3.75) ], history)
def test_match_keras(self): weight_spec = [ tf.TensorSpec([10, 2], tf.float32), tf.TensorSpec([2], tf.float32) ] steps = 10 genarator = tf.random.Generator.from_seed(2021) def random_vector(): return [ genarator.normal(shape=s.shape, dtype=s.dtype) for s in weight_spec ] intial_weight = random_vector() model_variables_fn = lambda: [tf.Variable(v) for v in intial_weight] gradients = [random_vector() for _ in range(steps)] tff_optimizer_fn = lambda: rmsprop.build_rmsprop(0.01, decay=0.9) keras_optimizer_fn = lambda: tf.keras.optimizers.RMSprop(0.01, rho=0.9) self.assert_optimizers_numerically_close(model_variables_fn, gradients, tff_optimizer_fn, keras_optimizer_fn)
class MimeLiteClientWorkExecutionTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('non-simulation', False), ('simulation', True)) @mock.patch.object(dataset_reduce, '_dataset_reduce_fn', wraps=dataset_reduce._dataset_reduce_fn) @tensorflow_test_utils.skip_test_for_multi_gpu def test_client_tf_dataset_reduce_fn(self, simulation, mock_method): process = mime._build_mime_lite_client_work( model_fn=_create_model, optimizer=sgdm.build_sgdm(learning_rate=0.1, momentum=0.9), client_weighting=client_weight_lib.ClientWeighting.NUM_EXAMPLES, use_experimental_simulation_loop=simulation) client_data = [_create_dataset()] client_model_weights = [_initial_weights()] process.next(process.initialize(), client_model_weights, client_data) if simulation: mock_method.assert_not_called() else: mock_method.assert_called() @parameterized.named_parameters( ('adagrad', adagrad.build_adagrad(0.1)), ('adam', adam.build_adam(0.1)), ('rmsprop', rmsprop.build_rmsprop(0.1)), ('sgd', sgdm.build_sgdm(0.1)), ('sgdm', sgdm.build_sgdm(0.1, momentum=0.9)), ('yogi', yogi.build_yogi(0.1))) @tensorflow_test_utils.skip_test_for_multi_gpu def test_execution_with_optimizer(self, optimizer): process = mime._build_mime_lite_client_work( _create_model, optimizer, client_weighting=client_weight_lib.ClientWeighting.NUM_EXAMPLES) client_data = [_create_dataset()] client_model_weights = [_initial_weights()] state = process.initialize() output = process.next(state, client_model_weights, client_data) self.assertEqual(8, output.measurements['train']['num_examples']) @tensorflow_test_utils.skip_test_for_multi_gpu def test_custom_metrics_aggregator(self): def sum_then_finalize_then_times_two(metric_finalizers, local_unfinalized_metrics_type): @federated_computation.federated_computation( computation_types.at_clients(local_unfinalized_metrics_type)) def aggregation_computation(client_local_unfinalized_metrics): unfinalized_metrics_sum = intrinsics.federated_sum( client_local_unfinalized_metrics) @tensorflow_computation.tf_computation( local_unfinalized_metrics_type) def finalizer_computation(unfinalized_metrics): finalized_metrics = collections.OrderedDict() for metric_name, metric_finalizer in metric_finalizers.items( ): finalized_metrics[metric_name] = metric_finalizer( unfinalized_metrics[metric_name]) * 2 return finalized_metrics return intrinsics.federated_map(finalizer_computation, unfinalized_metrics_sum) return aggregation_computation process = mime._build_mime_lite_client_work( model_fn=_create_model, optimizer=sgdm.build_sgdm(learning_rate=0.01, momentum=0.9), client_weighting=client_weight_lib.ClientWeighting.NUM_EXAMPLES, metrics_aggregator=sum_then_finalize_then_times_two) client_model_weights = [_initial_weights()] client_data = [_create_dataset()] output = process.next(process.initialize(), client_model_weights, client_data) # Train metrics should be multiplied by two by the custom aggregator. self.assertEqual(output.measurements['train']['num_examples'], 16)
def test_initialize_next_weights_mismatch_raises(self): optimizer = rmsprop.build_rmsprop(0.1) state = optimizer.initialize(_SCALAR_SPEC) with self.assertRaises(ValueError): optimizer.next(state, tf.zeros([2]), tf.zeros([2]))
def test_invalid_args_raises(self, lr, decay, epsilon, regex): with self.assertRaisesRegex(ValueError, regex): rmsprop.build_rmsprop(lr, decay, epsilon)
def test_build_rmsprop(self): optimizer = rmsprop.build_rmsprop(0.01) self.assertIsInstance(optimizer, optimizer_base.Optimizer)