def test_raises_non_tff_computation_get_model_weights_attribute(self): class BadIterativeProcess(tff.templates.IterativeProcess): def __init__(self): pass def initialize(self): return {} def next(self, state, data): return {} def get_model_weights(self, state): return {} iterative_process = BadIterativeProcess() def client_datasets_fn(round_num): del round_num return _federated_data() def evaluation_fn(model, round_num): del model, round_num return {} with self.assertRaisesRegex( training_loop.IterativeProcessCompatibilityError, _COMPATIBILITY_ERROR_MESSAGE): training_loop.run(iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=evaluation_fn, total_rounds=10, experiment_name='bad_iterative_process')
def test_fn_writes_metrics(self): experiment_name = 'test_metrics' iterative_process = _build_federated_averaging_process() def client_datasets_fn(round_num): del round_num return _federated_data() def test_fn(model): keras_model = _compiled_keras_model_builder() model.assign_weights_to(keras_model) batch = next(iter(_create_tf_dataset_for_client(5))) return {'loss': keras_model.evaluate(batch['x'], batch['y'])} root_output_dir = self.get_temp_dir() training_loop.run(iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=_evaluation_fn(), total_rounds=1, experiment_name=experiment_name, root_output_dir=root_output_dir, rounds_per_eval=10, test_fn=test_fn) csv_file = os.path.join(root_output_dir, 'results', experiment_name, 'experiment.metrics.csv') metrics_manager = tff.simulation.CSVMetricsManager(csv_file) fieldnames, metrics = metrics_manager.get_metrics() self.assertLen(metrics, 2) self.assertIn('test/loss', fieldnames)
def test_fedavg_training_decreases_loss(self): iterative_process = _build_federated_averaging_process() def client_datasets_fn(round_num): del round_num return _federated_data() def test_fn(model): keras_model = _compiled_keras_model_builder() model.assign_weights_to(keras_model) batch = next(iter(_create_tf_dataset_for_client(5))) return {'loss': keras_model.evaluate(batch['x'], batch['y'])} initial_state = iterative_process.initialize() initial_model = iterative_process.get_model_weights(initial_state) root_output_dir = self.get_temp_dir() final_state = training_loop.run( iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=_evaluation_fn(), total_rounds=1, experiment_name='fedavg_decreases_loss', root_output_dir=root_output_dir) final_model = iterative_process.get_model_weights(final_state) self.assertLess( test_fn(final_model)['loss'], test_fn(initial_model)['loss'])
def test_raises_non_callable_evaluate_fn(self): iterative_process = _build_federated_averaging_process() def client_datasets_fn(round_num): del round_num return _federated_data() metrics_dict = {} root_output_dir = self.get_temp_dir() with self.assertRaises(TypeError): training_loop.run(iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=metrics_dict, total_rounds=10, experiment_name='non_callable_evaluate', root_output_dir=root_output_dir)
def test_raises_non_callable_train_client_dataset(self): iterative_process = _build_federated_averaging_process() client_dataset = _create_tf_dataset_for_client(3) def evaluation_fn(model, round_num): del model, round_num return {} root_output_dir = self.get_temp_dir() with self.assertRaises(TypeError): training_loop.run(iterative_process=iterative_process, train_client_datasets_fn=client_dataset, evaluation_fn=evaluation_fn, total_rounds=10, experiment_name='non_callable_client_dataset', root_output_dir=root_output_dir)
def test_raises_non_str_output_dir(self): iterative_process = _build_federated_averaging_process() def client_datasets_fn(round_num): del round_num return _federated_data() def evaluation_fn(model, round_num): del model, round_num return {} with self.assertRaises(TypeError): training_loop.run(iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=evaluation_fn, total_rounds=10, experiment_name='non_str_output_dir', root_output_dir=1)
def test_raises_non_iterative_process(self): bad_iterative_process = _build_federated_averaging_process().next def client_datasets_fn(round_num): del round_num return _federated_data() def evaluation_fn(model, round_num): del model, round_num return {} root_output_dir = self.get_temp_dir() with self.assertRaises(TypeError): training_loop.run(iterative_process=[bad_iterative_process], train_client_datasets_fn=client_datasets_fn, evaluation_fn=evaluation_fn, total_rounds=10, experiment_name='non_iterative_process', root_output_dir=root_output_dir)
def test_checkpoint_manager_saves_state(self): experiment_name = 'checkpoint_manager_saves_state' iterative_process = _build_federated_averaging_process() def client_datasets_fn(round_num): del round_num return _federated_data() def evaluation_fn(model, round_num): del model, round_num return {} root_output_dir = self.get_temp_dir() final_state = training_loop.run( iterative_process=iterative_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=evaluation_fn, total_rounds=1, experiment_name=experiment_name, root_output_dir=root_output_dir) final_model = iterative_process.get_model_weights(final_state) ckpt_manager = tff.simulation.FileCheckpointManager( os.path.join(root_output_dir, 'checkpoints', experiment_name)) restored_state, restored_round = ckpt_manager.load_latest_checkpoint( final_state) self.assertEqual(restored_round, 0) keras_model = _compiled_keras_model_builder() restored_model = iterative_process.get_model_weights(restored_state) restored_model.assign_weights_to(keras_model) batch = next(iter(_create_tf_dataset_for_client(5))) restored_loss = keras_model.test_on_batch(batch['x'], batch['y']) final_model.assign_weights_to(keras_model) final_loss = keras_model.test_on_batch(batch['x'], batch['y']) self.assertEqual(final_loss, restored_loss)
def run_federated( iterative_process_builder: Callable[..., tff.templates.IterativeProcess], client_epochs_per_round: int, client_batch_size: int, clients_per_round: int, max_elements_per_user: int, image_size: int, num_groups: int = 8, total_rounds: int = 3000, dataset_type: dataset.DatasetType = dataset.DatasetType.GLD23K, experiment_name: str = 'federated_gld23k', root_output_dir: str = '/tmp/fedopt_guide', dropout_prob: Optional[float] = None, client_datasets_random_seed: Optional[int] = None, **kwargs) -> None: """Runs an iterative process on the Google Landmark dataset. This method will load and pre-process dataset and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process that it applies to the task, using `federated_research/fedopt_guide/training_loop`. We assume that the iterative process has the following functional type signatures: * `initialize`: `( -> S@SERVER)` where `S` represents the server state. * `next`: `<S@SERVER, {B*}@CLIENTS> -> <S@SERVER, T@SERVER>` where `S` represents the server state, `{B*}` represents the client datasets, and `T` represents a python `Mapping` object. The iterative process must also have a callable attribute `get_model_weights` that takes as input the state of the iterative process, and returns a `tff.learning.ModelWeights` object. Args: iterative_process_builder: A function that accepts a no-arg `model_fn`, a `client_weight_fn` and returns a `tff.templates.IterativeProcess`. The `model_fn` must return a `tff.learning.Model`. client_epochs_per_round: An integer representing the number of epochs of training performed per client in each training round. client_batch_size: An integer representing the batch size used on clients. clients_per_round: An integer representing the number of clients participating in each round. max_elements_per_user: The maximum number of elements processed for each client's dataset. This has be to a positive value or -1 (which means that all elements are taken for training). image_size: The height and width of images after preprocessing. num_groups: The number of groups in the GroupNorm layers of MobilenetV2. total_rounds: The number of federated training rounds. dataset_type: A `dataset.DatasetType` specifying which dataset is used for experiments. experiment_name: The name of the experiment being run. This will be appended to the `root_output_dir` for purposes of writing outputs. root_output_dir: The name of the root output directory for writing experiment outputs. dropout_prob: Probability of setting a weight to zero in the dropout layer of MobilenetV2. Must be in the range [0, 1). Setting it to None (default) or zero means no dropout. client_datasets_random_seed: An optional int used to seed which clients are sampled at each round. If `None`, no seed is used. **kwargs: Additional arguments configuring the training loop. For details on supported arguments, see `federated_research/fedopt_guide/training_utils.py`. """ num_classes, shuffle_buffer_size = dataset.get_dataset_stats(dataset_type) train_data, _ = tff.simulation.datasets.gldv2.load_data( gld23k=True if dataset_type == dataset.DatasetType.GLD23K else False) _, test_data = dataset.get_centralized_datasets( image_size=image_size, batch_size=client_batch_size, dataset_type=dataset_type) if dropout_prob and (dropout_prob < 0 or dropout_prob >= 1): raise ValueError( f'Expected a value in [0, 1) for `dropout_prob`, found {dropout_prob}.' ) def model_builder() -> tf.keras.Model: return mobilenet_v2.create_mobilenet_v2(input_shape=(image_size, image_size, 3), num_groups=num_groups, num_classes=num_classes, dropout_prob=dropout_prob) loss_builder = tf.keras.losses.SparseCategoricalCrossentropy metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()] input_spec = test_data.element_spec def model_fn() -> tff.learning.Model: return tff.learning.from_keras_model(keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) training_process = iterative_process_builder(model_fn=model_fn, client_weight_fn=None) preprocessing_fn = dataset.get_preprocessing_fn( image_size=image_size, batch_size=client_batch_size, num_epochs=client_epochs_per_round, max_elements=max_elements_per_user, shuffle_buffer_size=shuffle_buffer_size) @tff.tf_computation(tf.string) def train_dataset_computation(client_id): client_train_data = train_data.dataset_computation(client_id) return preprocessing_fn(client_train_data) trainer = tff.simulation.compose_dataset_computation_with_iterative_process( dataset_computation=train_dataset_computation, process=training_process) # `compose_dataset_computation_with_iterative_process` does not inherit the # `get_model_weights` attribute from the `training_process`. if not hasattr(training_process, 'get_model_weights'): raise ValueError( 'The `iterative_process_builder` must create an iterative process ' 'that has an attribute `get_model_weights`. It is a `tff.Computation` ' 'that accepts as input the state of an iterative process, and returns ' 'the model weights part from the state. If you use ' '`tff.learning.build_federated_averaging_process`, it should already ' 'satisfy this requirement.') else: trainer.get_model_weights = training_process.get_model_weights client_ids_fn = tff.simulation.build_uniform_sampling_fn( train_data.client_ids, size=clients_per_round, replace=False, random_seed=client_datasets_random_seed) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_ids_fn_as_list = lambda x: list(client_ids_fn(x)) evaluate_fn = training_utils.build_centralized_evaluate_fn( model_builder=model_builder, eval_dataset=test_data, loss_builder=loss_builder, metrics_builder=metrics_builder) logging.info('Training model:') logging.info(model_builder().summary()) training_loop.run(iterative_process=trainer, train_client_datasets_fn=client_ids_fn_as_list, evaluation_fn=lambda model, _: evaluate_fn(model), test_fn=evaluate_fn, total_rounds=total_rounds, experiment_name=experiment_name, root_output_dir=root_output_dir, **kwargs)
def run_federated( iterative_process_builder: Callable[..., tff.templates.IterativeProcess], client_epochs_per_round: int, client_batch_size: int, clients_per_round: int, client_datasets_random_seed: Optional[int] = None, crop_size: Optional[int] = 24, total_rounds: Optional[int] = 1500, experiment_name: Optional[str] = 'federated_cifar10', root_output_dir: Optional[str] = '/tmp/fed_opt', uniform_weighting: Optional[bool] = False, **kwargs): """Runs an iterative process on the CIFAR-10 classification task. This method will load and pre-process dataset and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process that it applies to the task, using `federated_research.utils.training_loop`. We assume that the iterative process has the following functional type signatures: * `initialize`: `( -> S@SERVER)` where `S` represents the server state. * `next`: `<S@SERVER, {B*}@CLIENTS> -> <S@SERVER, T@SERVER>` where `S` represents the server state, `{B*}` represents the client datasets, and `T` represents a python `Mapping` object. The iterative process must also have a callable attribute `get_model_weights` that takes as input the state of the iterative process, and returns a `tff.learning.ModelWeights` object. Args: iterative_process_builder: A function that accepts a no-arg `model_fn`, and returns a `tff.templates.IterativeProcess`. The `model_fn` must return a `tff.learning.Model`. client_epochs_per_round: An integer representing the number of epochs of training performed per client in each training round. client_batch_size: An integer representing the batch size used on clients. clients_per_round: An integer representing the number of clients participating in each round. client_datasets_random_seed: An optional int used to seed which clients are sampled at each round. If `None`, no seed is used. crop_size: An optional integer representing the resulting size of input images after preprocessing. total_rounds: The number of federated training rounds. experiment_name: The name of the experiment being run. This will be appended to the `root_output_dir` for purposes of writing outputs. root_output_dir: The name of the root output directory for writing experiment outputs. uniform_weighting: Whether to weigh clients uniformly. If false, clients are weighted by the number of samples. **kwargs: Additional arguments configuring the training loop. For details on supported arguments, see `federated_research/utils/training_utils.py`. """ crop_shape = (crop_size, crop_size, 3) cifar_train, _ = cifar10_dataset.get_federated_datasets( train_client_epochs_per_round=client_epochs_per_round, train_client_batch_size=client_batch_size, crop_shape=crop_shape) _, cifar_test = cifar10_dataset.get_centralized_datasets( crop_shape=crop_shape) input_spec = cifar_train.create_tf_dataset_for_client( cifar_train.client_ids[0]).element_spec model_builder = functools.partial(resnet_models.create_resnet18, input_shape=crop_shape, num_classes=NUM_CLASSES) loss_builder = tf.keras.losses.SparseCategoricalCrossentropy metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()] def tff_model_fn() -> tff.learning.Model: return tff.learning.from_keras_model(keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) if uniform_weighting: client_weight_fn = tff.learning.ClientWeighting.UNIFORM else: client_weight_fn = tff.learning.ClientWeighting.NUM_EXAMPLES training_process = iterative_process_builder(tff_model_fn, client_weight_fn) client_datasets_fn = tff.simulation.build_uniform_client_sampling_fn( dataset=cifar_train, clients_per_round=clients_per_round, random_seed=client_datasets_random_seed) evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn) def validation_fn(model_weights, round_num): del round_num return evaluate_fn(model_weights, [cifar_test]) def test_fn(model_weights): return evaluate_fn(model_weights, [cifar_test]) logging.info('Training model:') logging.info(model_builder().summary()) training_loop.run(iterative_process=training_process, train_client_datasets_fn=client_datasets_fn, evaluation_fn=validation_fn, test_fn=test_fn, total_rounds=total_rounds, experiment_name=experiment_name, root_output_dir=root_output_dir, **kwargs)
def run_federated(iterative_process_builder: Callable[ ..., tff.templates.IterativeProcess], client_epochs_per_round: int, client_batch_size: int, clients_per_round: int, max_elements_per_user: int, total_rounds: int = 3000, vocab_size: int = 10000, num_oov_buckets: int = 1, sequence_length: int = 20, num_validation_examples: int = 10000, dim_embed: int = 96, dim_model: int = 512, dim_hidden: int = 2048, num_heads: int = 8, num_layers: int = 1, max_position_encoding: int = 1000, dropout: float = 0.1, client_datasets_random_seed: Optional[int] = None, experiment_name: str = 'federated_stackoverflow', root_output_dir: str = '/tmp/fedopt_guide', max_val_test_batches: Optional[int] = None, **kwargs) -> None: """Configures training for Stack Overflow next-word prediction. This method will load and pre-process dataset and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process that it applies to the task, using `federated_research/fedopt_guide/training_loop`. We assume that the iterative process has the following functional type signatures: * `initialize`: `( -> S@SERVER)` where `S` represents the server state. * `next`: `<S@SERVER, {B*}@CLIENTS> -> <S@SERVER, T@SERVER>` where `S` represents the server state, `{B*}` represents the client datasets, and `T` represents a python `Mapping` object. The iterative process must also have a callable attribute `get_model_weights` that takes as input the state of the iterative process, and returns a `tff.learning.ModelWeights` object. Args: iterative_process_builder: A function that accepts a no-arg `model_fn`, a `client_weight_fn` and returns a `tff.templates.IterativeProcess`. The `model_fn` must return a `tff.learning.Model`. client_epochs_per_round: An integer representing the number of epochs of training performed per client in each training round. client_batch_size: An integer representing the batch size used on clients. clients_per_round: An integer representing the number of clients participating in each round. max_elements_per_user: The maximum number of elements processed for each client's dataset. This has be to a positive value or -1 (which means that all elements are taken for training). total_rounds: The number of federated training rounds. vocab_size: Integer dictating the number of most frequent words to use in the vocabulary. num_oov_buckets: The number of out-of-vocabulary buckets to use. sequence_length: The maximum number of words to take for each sequence. num_validation_examples: The number of test examples to use for validation. dim_embed: An integer for the dimension of the token embeddings. dim_model: An integer for the dimension of features of MultiHeadAttention layers. dim_hidden: An integer for the dimension of hidden layers of the FFN. num_heads: An integer for the number of attention heads. num_layers: An integer for the number of Transformer blocks. max_position_encoding: Maximum number of positions for position embeddings. dropout: Dropout rate. client_datasets_random_seed: An optional int used to seed which clients are sampled at each round. If `None`, no seed is used. experiment_name: The name of the experiment being run. This will be appended to the `root_output_dir` for purposes of writing outputs. root_output_dir: The name of the root output directory for writing experiment outputs. max_val_test_batches: If set to a positive integer, val and test datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. **kwargs: Additional arguments configuring the training loop. For details on supported arguments, see `federated_research/fedopt_guide/training_utils.py`. Returns: A `RunnerSpec` containing attributes used for running the newly created federated task. """ train_clientdata, _, _ = tff.simulation.datasets.stackoverflow.load_data() _, validation_dataset, test_dataset = stackoverflow_word_prediction.get_centralized_datasets( vocab_size=vocab_size, max_sequence_length=sequence_length, num_validation_examples=num_validation_examples, num_oov_buckets=num_oov_buckets) if max_val_test_batches and max_val_test_batches >= 1: validation_dataset = validation_dataset.take(max_val_test_batches) test_dataset = test_dataset.take(max_val_test_batches) model_builder = functools.partial( transformer_models.create_transformer_lm, vocab_size=vocab_size, num_oov_buckets=num_oov_buckets, dim_embed=dim_embed, dim_model=dim_model, dim_hidden=dim_hidden, num_heads=num_heads, num_layers=num_layers, max_position_encoding=max_position_encoding, dropout=dropout, name='stackoverflow-transformer') loss_builder = functools.partial( tf.keras.losses.SparseCategoricalCrossentropy, from_logits=True) special_tokens = stackoverflow_word_prediction.get_special_tokens( vocab_size, num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos def metrics_builder(): return [ keras_metrics.MaskedCategoricalAccuracy(name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), # Notice BOS never appears in ground truth. keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), keras_metrics.NumBatchesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]) ] train_dataset_preprocess_comp = stackoverflow_word_prediction.create_preprocess_fn( vocab=stackoverflow_word_prediction.create_vocab(vocab_size), num_oov_buckets=num_oov_buckets, client_batch_size=client_batch_size, client_epochs_per_round=client_epochs_per_round, max_sequence_length=sequence_length, max_elements_per_client=max_elements_per_user) input_spec = train_dataset_preprocess_comp.type_signature.result.element def tff_model_fn() -> tff.learning.Model: return tff.learning.from_keras_model(keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) def client_weight_fn(local_outputs): # Num_tokens is a tensor with type int64[1], to use as a weight need # a float32 scalar. return tf.cast(tf.squeeze(local_outputs['num_tokens']), tf.float32) iterative_process = iterative_process_builder( tff_model_fn, client_weight_fn=client_weight_fn) if hasattr(train_clientdata, 'dataset_computation'): @tff.tf_computation(tf.string) def train_dataset_computation(client_id): client_train_data = train_clientdata.dataset_computation(client_id) return train_dataset_preprocess_comp(client_train_data) training_process = tff.simulation.compose_dataset_computation_with_iterative_process( train_dataset_computation, iterative_process) client_ids_fn = tff.simulation.build_uniform_sampling_fn( train_clientdata.client_ids, size=clients_per_round, replace=False, random_seed=client_datasets_random_seed) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_sampling_fn = lambda x: list(client_ids_fn(x)) else: training_process = tff.simulation.compose_dataset_computation_with_iterative_process( train_dataset_preprocess_comp, iterative_process) client_sampling_fn = tff.simulation.build_uniform_client_sampling_fn( dataset=train_clientdata, clients_per_round=clients_per_round, random_seed=client_datasets_random_seed) training_process.get_model_weights = iterative_process.get_model_weights evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn) def validation_fn(model_weights, round_num): del round_num return evaluate_fn(model_weights, [validation_dataset]) def test_fn(model_weights): return evaluate_fn(model_weights, [validation_dataset.concatenate(test_dataset)]) logging.info('Training model:') logging.info(model_builder().summary()) training_loop.run(iterative_process=training_process, train_client_datasets_fn=client_sampling_fn, evaluation_fn=validation_fn, test_fn=test_fn, total_rounds=total_rounds, experiment_name=experiment_name, root_output_dir=root_output_dir, **kwargs)