def test_construction_calls_model_fn(self): # Assert that the the process building does not call `model_fn` too many # times. `model_fn` can potentially be expensive (loading weights, # processing, etc). mock_model_fn = mock.Mock(side_effect=TestModel) federated_evaluation.build_federated_evaluation(mock_model_fn) # TODO(b/186451541): reduce the number of calls to model_fn. self.assertEqual(mock_model_fn.call_count, 2)
def test_federated_evaluation_dataset_reduce(self, simulation, mock_method): evaluate_comp = federated_evaluation.build_federated_evaluation( _model_fn_from_keras, use_experimental_simulation_loop=simulation) initial_weights = tf.nest.map_structure( lambda x: x.read_value(), model_utils.ModelWeights.from_model(_model_fn_from_keras())) def _input_dict(temps): return collections.OrderedDict([ ('x', np.reshape(np.array(temps, dtype=np.float32), (-1, 1))), ('y', np.reshape(np.array(temps, dtype=np.float32), (-1, 1))), ]) evaluate_comp( initial_weights, [[_input_dict([1.0, 10.0, 2.0, 7.0]), _input_dict([6.0, 11.0])], [_input_dict([9.0, 12.0, 13.0])], [_input_dict([1.0]), _input_dict([22.0, 23.0])]]) if simulation: mock_method.assert_not_called() else: mock_method.assert_called()
def test_federated_evaluation_with_keras(self, simulation): evaluate_comp = federated_evaluation.build_federated_evaluation( _model_fn_from_keras, use_experimental_simulation_loop=simulation) initial_weights = tf.nest.map_structure( lambda x: x.read_value(), model_utils.ModelWeights.from_model(_model_fn_from_keras())) def _input_dict(temps): return collections.OrderedDict( x=np.reshape(np.array(temps, dtype=np.float32), (-1, 1)), y=np.reshape(np.array(temps, dtype=np.float32), (-1, 1))) result = evaluate_comp( initial_weights, [[_input_dict([1.0, 10.0, 2.0, 7.0]), _input_dict([6.0, 11.0])], [_input_dict([9.0, 12.0, 13.0])], [_input_dict([1.0]), _input_dict([22.0, 23.0])]]) # Expect 100% accuracy and no loss because we've constructed the identity # function and have the same x's and y's for training data. self.assertDictEqual( result, collections.OrderedDict( eval=collections.OrderedDict(accuracy=1.0, loss=0.0), stat=collections.OrderedDict(num_examples=12)))
def test_federated_evaluation(self): evaluate = federated_evaluation.build_federated_evaluation(TestModel) model_weights_type = model_utils.weights_type_from_model(TestModel) type_test_utils.assert_types_equivalent( evaluate.type_signature, FunctionType( parameter=StructType([ ('server_model_weights', computation_types.at_server(model_weights_type)), ('federated_dataset', computation_types.at_clients( SequenceType( StructType([('temp', TensorType(dtype=tf.float32, shape=[None]))])))), ]), result=computation_types.at_server( collections.OrderedDict(eval=collections.OrderedDict( num_over=tf.float32))))) def _temp_dict(temps): return {'temp': np.array(temps, dtype=np.float32)} result = evaluate( collections.OrderedDict(trainable=[5.0], non_trainable=[]), [ [_temp_dict([1.0, 10.0, 2.0, 7.0]), _temp_dict([6.0, 11.0])], [_temp_dict([9.0, 12.0, 13.0])], [_temp_dict([1.0]), _temp_dict([22.0, 23.0])], ]) self.assertEqual( result, collections.OrderedDict( eval=collections.OrderedDict(num_over=9.0), ))
def test_federated_evaluation_fails_stateful_broadcast(self): # Create a test stateful measured process that doesn't do anything useful. @computations.federated_computation def init_fn(): return intrinsics.federated_eval( computations.tf_computation( lambda: tf.zeros(shape=[], dtype=tf.float32)), placements.SERVER) @computations.federated_computation( computation_types.at_server(tf.float32), computation_types.at_clients(tf.int32)) def next_fn(state, value): return measured_process.MeasuredProcessOutput(state, value, state) broadcaster = measured_process.MeasuredProcess(init_fn, next_fn) with self.assertRaisesRegex(ValueError, 'stateful broadcast'): federated_evaluation.build_federated_evaluation( TestModelQuant, broadcast_process=broadcaster)
def test_federated_evaluation_with_keras(self): def model_fn(): keras_model = tf.keras.Sequential([ tf.keras.layers.Dense(1, kernel_initializer='ones', bias_initializer='zeros', activation=None) ], name='my_model') keras_model.compile(loss='mean_squared_error', optimizer='sgd', metrics=[tf.keras.metrics.Accuracy()]) return keras_utils.from_compiled_keras_model(keras_model, dummy_batch={ 'x': np.zeros( (1, 1), np.float32), 'y': np.zeros( (1, 1), np.float32) }) evaluate_comp = federated_evaluation.build_federated_evaluation( model_fn) initial_weights = tf.nest.map_structure( lambda x: x.read_value(), model_utils.enhance(model_fn()).weights) def _input_dict(temps): return { 'x': np.reshape(np.array(temps, dtype=np.float32), (-1, 1)), 'y': np.reshape(np.array(temps, dtype=np.float32), (-1, 1)) } result = evaluate_comp( initial_weights, [[_input_dict([1.0, 10.0, 2.0, 7.0]), _input_dict([6.0, 11.0])], [_input_dict([9.0, 12.0, 13.0])], [_input_dict([1.0]), _input_dict([22.0, 23.0])]]) # Expect 100% accuracy and no loss because we've constructed the identity # function and have the same x's and y's for training data. self.assertEqual(str(result), '<accuracy=1.0,loss=0.0>')
def test_federated_evaluation_simulation_loop(self, mock_method): evaluate_comp = federated_evaluation.build_federated_evaluation( _model_fn_from_keras, use_experimental_simulation_loop=True) initial_weights = tf.nest.map_structure( lambda x: x.read_value(), model_utils.ModelWeights.from_model(_model_fn_from_keras())) def _input_dict(temps): return collections.OrderedDict( x=np.reshape(np.array(temps, dtype=np.float32), (-1, 1)), y=np.reshape(np.array(temps, dtype=np.float32), (-1, 1))) evaluate_comp( initial_weights, [[_input_dict([1.0, 10.0, 2.0, 7.0]), _input_dict([6.0, 11.0])]]) mock_method.assert_not_called()
def test_federated_evaluation(self): evaluate = federated_evaluation.build_federated_evaluation(TestModel) self.assertEqual( str(evaluate.type_signature), '(<<trainable=<max_temp=float32>,non_trainable=<>>@SERVER,' '{<temp=float32[?]>*}@CLIENTS> -> <num_over=float32@SERVER>)') def _temp_dict(temps): return {'temp': np.array(temps, dtype=np.float32)} result = evaluate({ 'trainable': { 'max_temp': 5.0 }, 'non_trainable': {} }, [[_temp_dict([1.0, 10.0, 2.0, 7.0]), _temp_dict([6.0, 11.0])], [_temp_dict([9.0, 12.0, 13.0])], [_temp_dict([1.0]), _temp_dict([22.0, 23.0])]]) self.assertEqual(str(result), '<num_over=9.0>')
def test_federated_evaluation_with_keras(self): def model_fn(): keras_model = tf.keras.Sequential([ tf.keras.layers.Input(shape=(1, )), tf.keras.layers.Dense(1, kernel_initializer='ones', bias_initializer='zeros', activation=None) ], name='my_model') return keras_utils.from_keras_model( keras_model, input_spec=collections.OrderedDict( x=tf.TensorSpec(shape=(None, 1), dtype=tf.float32), y=tf.TensorSpec(shape=(None, 1), dtype=tf.float32), ), loss=tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.Accuracy()]) evaluate_comp = federated_evaluation.build_federated_evaluation( model_fn) initial_weights = tf.nest.map_structure( lambda x: x.read_value(), model_utils.enhance(model_fn()).weights) def _input_dict(temps): return collections.OrderedDict([ ('x', np.reshape(np.array(temps, dtype=np.float32), (-1, 1))), ('y', np.reshape(np.array(temps, dtype=np.float32), (-1, 1))), ]) result = evaluate_comp( initial_weights, [[_input_dict([1.0, 10.0, 2.0, 7.0]), _input_dict([6.0, 11.0])], [_input_dict([9.0, 12.0, 13.0])], [_input_dict([1.0]), _input_dict([22.0, 23.0])]]) # Expect 100% accuracy and no loss because we've constructed the identity # function and have the same x's and y's for training data. self.assertEqual( str(result), '<accuracy=1.0,loss=0.0,keras_training_time_client_sum_sec=0.0>')
def test_federated_evaluation(self): evaluate = federated_evaluation.build_federated_evaluation(TestModel) self.assertEqual( str(evaluate.type_signature), '(<<trainable=<float32>,non_trainable=<>>@SERVER,' '{<temp=float32[?]>*}@CLIENTS> -> <num_over=float32@SERVER>)') def _temp_dict(temps): return {'temp': np.array(temps, dtype=np.float32)} result = evaluate( collections.OrderedDict([ ('trainable', [5.0]), ('non_trainable', []), ]), [ [_temp_dict([1.0, 10.0, 2.0, 7.0]), _temp_dict([6.0, 11.0])], [_temp_dict([9.0, 12.0, 13.0])], [_temp_dict([1.0]), _temp_dict([22.0, 23.0])], ]) self.assertEqual(result, collections.OrderedDict(num_over=9.0))
def test_federated_evaluation_quantized_aggressively(self): # Set up a uniform quantization encoder as the broadcaster. broadcaster = ( encoding_utils.build_encoded_broadcast_process_from_model( TestModelQuant, _build_simple_quant_encoder(2))) self.assert_types_equivalent(broadcaster.next.type_signature, _build_expected_broadcaster_next_signature()) evaluate = federated_evaluation.build_federated_evaluation( TestModelQuant, broadcast_process=broadcaster) # Confirm that the type signature matches what is expected. self.assert_types_identical( evaluate.type_signature, _build_expected_test_quant_model_eval_signature()) def _temp_dict(temps): return {'temp': np.array(temps, dtype=np.float32)} result = evaluate( collections.OrderedDict( trainable=[[5.0, 10.0, 5.0, 7.0]], non_trainable=[]), [ [ _temp_dict([1.0, 10.0, 2.0, 7.0]), _temp_dict([6.0, 11.0, 5.0, 8.0]) ], [_temp_dict([9.0, 12.0, 13.0, 7.0])], [ _temp_dict([1.0, 22.0, 23.0, 24.0]), _temp_dict([5.0, 10.0, 5.0, 7.0]) ], ]) # This very aggressive quantization should be so lossy that some of the # data is changed during encoding so the number that are equal between # the original and the final result should not be 8 as it is in the # conservative quantization test above. self.assertContainsSubset(result.keys(), ['eval', 'stat']) self.assertContainsSubset(result['eval'].keys(), ['num_same']) self.assertLess(result['eval']['num_same'], 8.0) self.assertContainsSubset(result['stat'].keys(), ['num_examples']) self.assertEqual(result['stat']['num_examples'], 20)
def test_federated_evaluation_quantized_conservatively(self): # Set up a uniform quantization encoder as the broadcaster. broadcaster = ( encoding_utils.build_encoded_broadcast_process_from_model( TestModelQuant, _build_simple_quant_encoder(12))) type_test_utils.assert_types_equivalent( broadcaster.next.type_signature, _build_expected_broadcaster_next_signature()) evaluate = federated_evaluation.build_federated_evaluation( TestModelQuant, broadcast_process=broadcaster) # Confirm that the type signature matches what is expected. type_test_utils.assert_types_identical( evaluate.type_signature, _build_expected_test_quant_model_eval_signature()) def _temp_dict(temps): return {'temp': np.array(temps, dtype=np.float32)} result = evaluate( collections.OrderedDict(trainable=[[5.0, 10.0, 5.0, 7.0]], non_trainable=[]), [ [ _temp_dict([1.0, 10.0, 2.0, 7.0]), _temp_dict([6.0, 11.0, 5.0, 8.0]) ], [_temp_dict([9.0, 12.0, 13.0, 7.0])], [ _temp_dict([1.0, 22.0, 23.0, 24.0]), _temp_dict([5.0, 10.0, 5.0, 7.0]) ], ]) # This conservative quantization should not be too lossy. # When comparing the data examples to trainable, there are 8 times # where the index and value match. self.assertEqual( result, collections.OrderedDict(eval=collections.OrderedDict( num_same=8.0)))
def test_federated_evaluation_fails_non_measured_process_broadcast(self): broadcaster = computations.tf_computation(lambda x: x) with self.assertRaisesRegex(ValueError, '`MeasuredProcess`'): federated_evaluation.build_federated_evaluation( TestModelQuant, broadcast_process=broadcaster)
def test_no_unsecure_aggregation_with_secure_metrics_finalizer(self): evaluate_comp = federated_evaluation.build_federated_evaluation( _model_fn_from_keras, metrics_aggregator=aggregator.secure_sum_then_finalize) static_assert.assert_not_contains_unsecure_aggregation(evaluate_comp)