def test_preprocess_element_spec(self):
   ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
   preprocess_fn = emnist_dataset.create_preprocess_fn(
       num_epochs=1,
       batch_size=20,
       shuffle_buffer_size=1,
       emnist_task='autoencoder')
   preprocessed_ds = preprocess_fn(ds)
   self.assertEqual(preprocessed_ds.element_spec,
                    (tf.TensorSpec(shape=(None, 784), dtype=tf.float32),
                     tf.TensorSpec(shape=(None, 784), dtype=tf.float32)))
  def test_preprocess_returns_correct_element(self):
    ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
    preprocess_fn = emnist_dataset.create_preprocess_fn(
        num_epochs=1,
        batch_size=20,
        shuffle_buffer_size=1,
        emnist_task='digit_recognition')
    preprocessed_ds = preprocess_fn(ds)

    element = next(iter(preprocessed_ds))
    expected_element = (tf.zeros(shape=(1, 28, 28, 1), dtype=tf.float32),
                        tf.zeros(shape=(1,), dtype=tf.int32))
    self.assertAllClose(self.evaluate(element), expected_element)
  def test_preprocess_returns_correct_element(self):
    ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
    preprocess_fn = emnist_dataset.create_preprocess_fn(
        num_epochs=1,
        batch_size=20,
        shuffle_buffer_size=1,
        emnist_task='autoencoder')
    preprocessed_ds = preprocess_fn(ds)
    self.assertEqual(preprocessed_ds.element_spec,
                     (tf.TensorSpec(shape=(None, 784), dtype=tf.float32),
                      tf.TensorSpec(shape=(None, 784), dtype=tf.float32)))

    element = next(iter(preprocessed_ds))
    expected_element = (tf.ones(shape=(1, 784), dtype=tf.float32),
                        tf.ones(shape=(1, 784), dtype=tf.float32))
    self.assertAllClose(self.evaluate(element), expected_element)
Ejemplo n.º 4
0
def configure_training(
    task_spec: training_specs.TaskSpec) -> training_specs.RunnerSpec:
  """Configures training for the EMNIST autoencoder task.

  This method will load and pre-process datasets and construct a model used for
  the task. It then uses `iterative_process_builder` to create an iterative
  process compatible with `federated_research.utils.training_loop`.

  Args:
    task_spec: A `TaskSpec` class for creating federated training tasks.

  Returns:
    A `RunnerSpec` containing attributes used for running the newly created
    federated task.
  """

  emnist_task = 'autoencoder'
  emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False)
  _, emnist_test = emnist_dataset.get_centralized_datasets(
      only_digits=False, emnist_task=emnist_task)

  train_preprocess_fn = emnist_dataset.create_preprocess_fn(
      num_epochs=task_spec.client_epochs_per_round,
      batch_size=task_spec.client_batch_size,
      emnist_task=emnist_task)

  input_spec = train_preprocess_fn.type_signature.result.element

  model_builder = emnist_ae_models.create_autoencoder_model
  loss_builder = functools.partial(
      tf.keras.losses.MeanSquaredError, reduction=tf.keras.losses.Reduction.SUM)
  metrics_builder = lambda: [tf.keras.metrics.MeanSquaredError()]

  def tff_model_fn() -> tff.learning.Model:
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        input_spec=input_spec,
        loss=loss_builder(),
        metrics=metrics_builder())

  iterative_process = task_spec.iterative_process_builder(tff_model_fn)

  if hasattr(emnist_train, 'dataset_computation'):

    @tff.tf_computation(tf.string)
    def build_train_dataset_from_client_id(client_id):
      client_dataset = emnist_train.dataset_computation(client_id)
      return train_preprocess_fn(client_dataset)

    training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
        build_train_dataset_from_client_id, iterative_process)
    client_ids_fn = training_utils.build_sample_fn(
        emnist_train.client_ids,
        size=task_spec.clients_per_round,
        replace=False,
        random_seed=task_spec.sampling_random_seed)
    # We convert the output to a list (instead of an np.ndarray) so that it can
    # be used as input to the iterative process.
    client_sampling_fn = lambda x: list(client_ids_fn(x))

  else:
    training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
        train_preprocess_fn, iterative_process)
    client_sampling_fn = training_utils.build_client_datasets_fn(
        dataset=emnist_train,
        clients_per_round=task_spec.clients_per_round,
        random_seed=task_spec.sampling_random_seed)

  training_process.get_model_weights = iterative_process.get_model_weights

  test_fn = training_utils.build_centralized_evaluate_fn(
      eval_dataset=emnist_test,
      model_builder=model_builder,
      loss_builder=loss_builder,
      metrics_builder=metrics_builder)

  validation_fn = lambda model_weights, round_num: test_fn(model_weights)

  return training_specs.RunnerSpec(
      iterative_process=training_process,
      client_datasets_fn=client_sampling_fn,
      validation_fn=validation_fn,
      test_fn=test_fn)
Ejemplo n.º 5
0
def configure_training(task_spec: training_specs.TaskSpec,
                       model: str = 'cnn') -> training_specs.RunnerSpec:
  """Configures training for the EMNIST character recognition task.

  This method will load and pre-process datasets and construct a model used for
  the task. It then uses `iterative_process_builder` to create an iterative
  process compatible with `federated_research.utils.training_loop`.

  Args:
    task_spec: A `TaskSpec` class for creating federated training tasks.
    model: A string specifying the model used for character recognition. Can be
      one of `cnn` and `2nn`, corresponding to a CNN model and a densely
      connected 2-layer model (respectively).

  Returns:
    A `RunnerSpec` containing attributes used for running the newly created
    federated task.
  """
  emnist_task = 'digit_recognition'
  emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False)
  _, emnist_test = emnist_dataset.get_centralized_datasets(
      only_digits=False, emnist_task=emnist_task)

  train_preprocess_fn = emnist_dataset.create_preprocess_fn(
      num_epochs=task_spec.client_epochs_per_round,
      batch_size=task_spec.client_batch_size,
      emnist_task=emnist_task)

  input_spec = train_preprocess_fn.type_signature.result.element

  if model == 'cnn':
    model_builder = functools.partial(
        emnist_models.create_conv_dropout_model, only_digits=False)
  elif model == '2nn':
    model_builder = functools.partial(
        emnist_models.create_two_hidden_layer_model, only_digits=False)
  else:
    raise ValueError(
        'Cannot handle model flag [{!s}], must be one of {!s}.'.format(
            model, EMNIST_MODELS))

  loss_builder = tf.keras.losses.SparseCategoricalCrossentropy
  metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()]

  def tff_model_fn() -> tff.learning.Model:
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        input_spec=input_spec,
        loss=loss_builder(),
        metrics=metrics_builder())

  iterative_process = task_spec.iterative_process_builder(tff_model_fn)

  @tff.tf_computation(tf.string)
  def build_train_dataset_from_client_id(client_id):
    client_dataset = emnist_train.dataset_computation(client_id)
    return train_preprocess_fn(client_dataset)

  training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
      build_train_dataset_from_client_id, iterative_process)
  client_ids_fn = tff.simulation.build_uniform_sampling_fn(
      emnist_train.client_ids,
      size=task_spec.clients_per_round,
      replace=False,
      random_seed=task_spec.client_datasets_random_seed)
  # We convert the output to a list (instead of an np.ndarray) so that it can
  # be used as input to the iterative process.
  client_sampling_fn = lambda x: list(client_ids_fn(x))

  training_process.get_model_weights = iterative_process.get_model_weights

  evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn)

  def test_fn(state):
    return evaluate_fn(
        iterative_process.get_model_weights(state), [emnist_test])

  def validation_fn(state, round_num):
    del round_num
    return evaluate_fn(
        iterative_process.get_model_weights(state), [emnist_test])

  return training_specs.RunnerSpec(
      iterative_process=training_process,
      client_datasets_fn=client_sampling_fn,
      validation_fn=validation_fn,
      test_fn=test_fn)
Ejemplo n.º 6
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Expected no command-line arguments, '
                         'got: {}'.format(argv))

  emnist_task = 'digit_recognition'
  emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False)
  _, emnist_test = emnist_dataset.get_centralized_datasets(
      only_digits=False, emnist_task=emnist_task)

  train_preprocess_fn = emnist_dataset.create_preprocess_fn(
      num_epochs=FLAGS.client_epochs_per_round,
      batch_size=FLAGS.client_batch_size,
      emnist_task=emnist_task)

  input_spec = train_preprocess_fn.type_signature.result.element

  if FLAGS.model == 'cnn':
    model_builder = functools.partial(
        emnist_models.create_conv_dropout_model, only_digits=FLAGS.only_digits)
  elif FLAGS.model == '2nn':
    model_builder = functools.partial(
        emnist_models.create_two_hidden_layer_model,
        only_digits=FLAGS.only_digits)
  elif FLAGS.model == '1m_cnn':
    model_builder = functools.partial(
        create_1m_cnn_model, only_digits=FLAGS.only_digits)
  else:
    raise ValueError('Cannot handle model flag [{!s}].'.format(FLAGS.model))

  logging.info('Training model:')
  logging.info(model_builder().summary())

  loss_builder = tf.keras.losses.SparseCategoricalCrossentropy
  metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()]

  compression_dict = utils_impl.lookup_flag_values(compression_flags)
  dp_dict = utils_impl.lookup_flag_values(dp_flags)

  # Most logic for deciding what baseline to run is here.
  aggregation_factory = fl_utils.build_aggregator(
      compression_flags=compression_dict,
      dp_flags=dp_dict,
      num_clients=len(emnist_train.client_ids),
      num_clients_per_round=FLAGS.clients_per_round,
      num_rounds=FLAGS.total_rounds,
      client_template=model_builder().trainable_variables)

  def tff_model_fn():
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        loss=loss_builder(),
        input_spec=input_spec,
        metrics=metrics_builder())

  server_optimizer_fn = lambda: utils_impl.create_optimizer_from_flags('server')
  client_optimizer_fn = lambda: utils_impl.create_optimizer_from_flags('client')

  iterative_process = tff.learning.build_federated_averaging_process(
      model_fn=tff_model_fn,
      server_optimizer_fn=server_optimizer_fn,
      client_weighting=tff.learning.ClientWeighting.UNIFORM,
      client_optimizer_fn=client_optimizer_fn,
      model_update_aggregation_factory=aggregation_factory)

  @tff.tf_computation(tf.string)
  def build_train_dataset_from_client_id(client_id):
    client_dataset = emnist_train.dataset_computation(client_id)
    return train_preprocess_fn(client_dataset)

  training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
      build_train_dataset_from_client_id, iterative_process)
  training_process.get_model_weights = iterative_process.get_model_weights

  client_ids_fn = functools.partial(
      tff.simulation.build_uniform_sampling_fn(
          emnist_train.client_ids,
          replace=False,
          random_seed=FLAGS.client_datasets_random_seed),
      size=FLAGS.clients_per_round)

  # We convert the output to a list (instead of an np.ndarray) so that it can
  # be used as input to the iterative process.
  client_sampling_fn = lambda x: list(client_ids_fn(x))

  evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn)

  def test_fn(state):
    return evaluate_fn(
        iterative_process.get_model_weights(state), [emnist_test])

  def validation_fn(state, round_num):
    del round_num
    return evaluate_fn(
        iterative_process.get_model_weights(state), [emnist_test])

  training_loop.run(
      iterative_process=training_process,
      client_datasets_fn=client_sampling_fn,
      validation_fn=validation_fn,
      test_fn=test_fn,
      total_rounds=FLAGS.total_rounds,
      experiment_name=FLAGS.experiment_name,
      root_output_dir=FLAGS.root_output_dir,
      rounds_per_eval=FLAGS.rounds_per_eval,
      rounds_per_checkpoint=FLAGS.rounds_per_checkpoint)
Ejemplo n.º 7
0
def configure_training(task_spec: training_specs.TaskSpec,
                       eval_spec: Optional[training_specs.EvalSpec] = None,
                       model: str = 'cnn') -> training_specs.RunnerSpec:
  """Configures training for the EMNIST character recognition task.

  This method will load and pre-process datasets and construct a model used for
  the task. It then uses `iterative_process_builder` to create an iterative
  process compatible with `federated_research.utils.training_loop`.

  Args:
    task_spec: A `TaskSpec` class for creating federated training tasks.
    eval_spec: An `EvalSpec` class for configuring federated evaluation. If set
      to None, centralized evaluation is used for validation and testing
      instead.
    model: A string specifying the model used for character recognition. Can be
      one of `cnn` and `2nn`, corresponding to a CNN model and a densely
      connected 2-layer model (respectively).

  Returns:
    A `RunnerSpec` containing attributes used for running the newly created
    federated task.
  """
  emnist_task = 'digit_recognition'

  emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data(
      only_digits=False)

  train_preprocess_fn = emnist_dataset.create_preprocess_fn(
      num_epochs=task_spec.client_epochs_per_round,
      batch_size=task_spec.client_batch_size,
      emnist_task=emnist_task)

  input_spec = train_preprocess_fn.type_signature.result.element

  if model == 'cnn':
    model_builder = functools.partial(
        emnist_models.create_conv_dropout_model, only_digits=False)
  elif model == '2nn':
    model_builder = functools.partial(
        emnist_models.create_two_hidden_layer_model, only_digits=False)
  else:
    raise ValueError(
        'Cannot handle model flag [{!s}], must be one of {!s}.'.format(
            model, EMNIST_MODELS))

  loss_builder = tf.keras.losses.SparseCategoricalCrossentropy
  metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()]

  def tff_model_fn() -> tff.learning.Model:
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        input_spec=input_spec,
        loss=loss_builder(),
        metrics=metrics_builder())

  iterative_process = task_spec.iterative_process_builder(tff_model_fn)

  clients_per_train_round = min(task_spec.clients_per_round,
                                TOTAL_NUM_TRAIN_CLIENTS)

  if hasattr(emnist_train, 'dataset_computation'):

    @tff.tf_computation(tf.string)
    def build_train_dataset_from_client_id(client_id):
      client_dataset = emnist_train.dataset_computation(client_id)
      return train_preprocess_fn(client_dataset)

    training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
        build_train_dataset_from_client_id, iterative_process)
    client_ids_fn = training_utils.build_sample_fn(
        emnist_train.client_ids,
        size=clients_per_train_round,
        replace=False,
        random_seed=task_spec.sampling_random_seed)
    # We convert the output to a list (instead of an np.ndarray) so that it can
    # be used as input to the iterative process.
    client_sampling_fn = lambda x: list(client_ids_fn(x))

  else:
    training_process = tff.simulation.compose_dataset_computation_with_iterative_process(
        train_preprocess_fn, iterative_process)
    client_sampling_fn = training_utils.build_client_datasets_fn(
        dataset=emnist_train,
        clients_per_round=clients_per_train_round,
        random_seed=task_spec.sampling_random_seed)

  training_process.get_model_weights = iterative_process.get_model_weights

  if eval_spec:

    if eval_spec.clients_per_validation_round is None:
      clients_per_validation_round = TOTAL_NUM_TEST_CLIENTS
    else:
      clients_per_validation_round = min(eval_spec.clients_per_validation_round,
                                         TOTAL_NUM_TEST_CLIENTS)

    if eval_spec.clients_per_test_round is None:
      clients_per_test_round = TOTAL_NUM_TEST_CLIENTS
    else:
      clients_per_test_round = min(eval_spec.clients_per_test_round,
                                   TOTAL_NUM_TEST_CLIENTS)

    test_preprocess_fn = emnist_dataset.create_preprocess_fn(
        num_epochs=1,
        batch_size=eval_spec.client_batch_size,
        shuffle_buffer_size=1,
        emnist_task=emnist_task)
    emnist_test = emnist_test.preprocess(test_preprocess_fn)

    def eval_metrics_builder():
      return [
          tf.keras.metrics.SparseCategoricalCrossentropy(),
          tf.keras.metrics.SparseCategoricalAccuracy()
      ]

    federated_eval_fn = training_utils.build_federated_evaluate_fn(
        model_builder=model_builder, metrics_builder=eval_metrics_builder)

    validation_client_sampling_fn = training_utils.build_client_datasets_fn(
        emnist_test,
        clients_per_validation_round,
        random_seed=eval_spec.sampling_random_seed)
    test_client_sampling_fn = training_utils.build_client_datasets_fn(
        emnist_test,
        clients_per_test_round,
        random_seed=eval_spec.sampling_random_seed)

    def validation_fn(model_weights, round_num):
      validation_clients = validation_client_sampling_fn(round_num)
      return federated_eval_fn(model_weights, validation_clients)

    def test_fn(model_weights):
      # We fix the round number to get deterministic behavior
      test_round_num = 0
      test_clients = test_client_sampling_fn(test_round_num)
      return federated_eval_fn(model_weights, test_clients)

  else:
    _, central_emnist_test = emnist_dataset.get_centralized_datasets(
        only_digits=False, emnist_task=emnist_task)

    test_fn = training_utils.build_centralized_evaluate_fn(
        eval_dataset=central_emnist_test,
        model_builder=model_builder,
        loss_builder=loss_builder,
        metrics_builder=metrics_builder)

    validation_fn = lambda model_weights, round_num: test_fn(model_weights)

  return training_specs.RunnerSpec(
      iterative_process=training_process,
      client_datasets_fn=client_sampling_fn,
      validation_fn=validation_fn,
      test_fn=test_fn)