예제 #1
0
 def test_non_supported_task_raises(self):
     preprocess_spec = client_spec.ClientSpec(num_epochs=1, batch_size=1)
     with self.assertRaisesRegex(
             ValueError,
             'emnist_task must be one of "character_recognition" or "autoencoder".'
     ):
         emnist_preprocessing.create_preprocess_fn(preprocess_spec,
                                                   emnist_task='bad_task')
예제 #2
0
 def test_non_supported_task_raises(self):
     with self.assertRaisesRegex(
             ValueError,
             'emnist_task must be one of "digit_recognition" or "autoencoder".'
     ):
         emnist_preprocessing.create_preprocess_fn(num_epochs=1,
                                                   batch_size=1,
                                                   shuffle_buffer_size=1,
                                                   emnist_task='bad_task')
예제 #3
0
def create_character_recognition_task_from_datasets(
    train_client_spec: client_spec.ClientSpec,
    eval_client_spec: Optional[client_spec.ClientSpec],
    model_id: Union[str, CharacterRecognitionModel], only_digits: bool,
    train_data: client_data.ClientData,
    test_data: client_data.ClientData) -> baseline_task.BaselineTask:
  """Creates a baseline task for character recognition on EMNIST.

  Args:
    train_client_spec: A `tff.simulation.baselines.ClientSpec` specifying how to
      preprocess train client data.
    eval_client_spec: An optional `tff.simulation.baselines.ClientSpec`
      specifying how to preprocess evaluation client data. If set to `None`, the
      evaluation datasets will use a batch size of 64 with no extra
      preprocessing.
    model_id: A string identifier for a character recognition model. Must be one
      of 'cnn_dropout', 'cnn', or '2nn'. These correspond respectively to a CNN
      model with dropout, a CNN model with no dropout, and a densely connected
      network with two hidden layers of width 200.
    only_digits: A boolean indicating whether to use the full EMNIST-62 dataset
      containing 62 alphanumeric classes (`True`) or the smaller EMNIST-10
      dataset with only 10 numeric classes (`False`).
    train_data: A `tff.simulation.datasets.ClientData` used for training.
    test_data: A `tff.simulation.datasets.ClientData` used for testing.

  Returns:
    A `tff.simulation.baselines.BaselineTask`.
  """
  emnist_task = 'character_recognition'

  if eval_client_spec is None:
    eval_client_spec = client_spec.ClientSpec(
        num_epochs=1, batch_size=64, shuffle_buffer_size=1)

  train_preprocess_fn = emnist_preprocessing.create_preprocess_fn(
      train_client_spec, emnist_task=emnist_task)
  eval_preprocess_fn = emnist_preprocessing.create_preprocess_fn(
      eval_client_spec, emnist_task=emnist_task)

  task_datasets = task_data.BaselineTaskDatasets(
      train_data=train_data,
      test_data=test_data,
      validation_data=None,
      train_preprocess_fn=train_preprocess_fn,
      eval_preprocess_fn=eval_preprocess_fn)

  def model_fn() -> model.Model:
    return keras_utils.from_keras_model(
        keras_model=_get_character_recognition_model(model_id, only_digits),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        input_spec=task_datasets.element_type_structure,
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

  return baseline_task.BaselineTask(task_datasets, model_fn)
예제 #4
0
def create_autoencoder_task_from_datasets(
        train_client_spec: client_spec.ClientSpec,
        eval_client_spec: Optional[client_spec.ClientSpec],
        train_data: client_data.ClientData,
        test_data: client_data.ClientData) -> baseline_task.BaselineTask:
    """Creates a baseline task for autoencoding on EMNIST.

  Args:
    train_client_spec: A `tff.simulation.baselines.ClientSpec` specifying how to
      preprocess train client data.
    eval_client_spec: An optional `tff.simulation.baselines.ClientSpec`
      specifying how to preprocess evaluation client data. If set to `None`, the
      evaluation datasets will use a batch size of 64 with no extra
      preprocessing.
    train_data: A `tff.simulation.datasets.ClientData` used for training.
    test_data: A `tff.simulation.datasets.ClientData` used for testing.

  Returns:
    A `tff.simulation.baselines.BaselineTask`.
  """
    emnist_task = 'autoencoder'

    if eval_client_spec is None:
        eval_client_spec = client_spec.ClientSpec(num_epochs=1,
                                                  batch_size=64,
                                                  shuffle_buffer_size=1)

    train_preprocess_fn = emnist_preprocessing.create_preprocess_fn(
        train_client_spec, emnist_task=emnist_task)
    eval_preprocess_fn = emnist_preprocessing.create_preprocess_fn(
        eval_client_spec, emnist_task=emnist_task)
    task_datasets = task_data.BaselineTaskDatasets(
        train_data=train_data,
        test_data=test_data,
        validation_data=None,
        train_preprocess_fn=train_preprocess_fn,
        eval_preprocess_fn=eval_preprocess_fn)

    def model_fn() -> model.Model:
        return keras_utils.from_keras_model(
            keras_model=emnist_models.create_autoencoder_model(),
            loss=tf.keras.losses.MeanSquaredError(),
            input_spec=task_datasets.element_type_structure,
            metrics=[
                tf.keras.metrics.MeanSquaredError(),
                tf.keras.metrics.MeanAbsoluteError()
            ])

    return baseline_task.BaselineTask(task_datasets, model_fn)
예제 #5
0
 def test_ds_length_with_max_elements(self, max_elements):
     repeat_size = 10
     ds = tf.data.Dataset.from_tensor_slices(TEST_DATA).repeat(repeat_size)
     preprocess_spec = client_spec.ClientSpec(num_epochs=1,
                                              batch_size=1,
                                              max_elements=max_elements)
     preprocess_fn = emnist_preprocessing.create_preprocess_fn(
         preprocess_spec)
     preprocessed_ds = preprocess_fn(ds)
     self.assertEqual(_compute_length_of_dataset(preprocessed_ds),
                      min(repeat_size, max_elements))
예제 #6
0
 def test_ds_length_is_ceil_num_epochs_over_batch_size(
         self, num_epochs, batch_size):
     ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
     preprocess_spec = client_spec.ClientSpec(num_epochs=num_epochs,
                                              batch_size=batch_size)
     preprocess_fn = emnist_preprocessing.create_preprocess_fn(
         preprocess_spec)
     preprocessed_ds = preprocess_fn(ds)
     self.assertEqual(
         _compute_length_of_dataset(preprocessed_ds),
         tf.cast(tf.math.ceil(num_epochs / batch_size), tf.int32))
예제 #7
0
    def test_autoencoder_preprocess_returns_correct_elements(self):
        ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
        preprocess_spec = client_spec.ClientSpec(num_epochs=1,
                                                 batch_size=20,
                                                 shuffle_buffer_size=1)
        preprocess_fn = emnist_preprocessing.create_preprocess_fn(
            preprocess_spec, emnist_task='autoencoder')
        preprocessed_ds = preprocess_fn(ds)
        self.assertEqual(preprocessed_ds.element_spec,
                         (tf.TensorSpec(shape=(None, 784), dtype=tf.float32),
                          tf.TensorSpec(shape=(None, 784), dtype=tf.float32)))

        element = next(iter(preprocessed_ds))
        expected_element = (tf.ones(shape=(1, 784), dtype=tf.float32),
                            tf.ones(shape=(1, 784), dtype=tf.float32))
        self.assertAllClose(self.evaluate(element), expected_element)
예제 #8
0
    def test_digit_recognition_preprocess_returns_correct_elements(self):
        ds = tf.data.Dataset.from_tensor_slices(TEST_DATA)
        preprocess_fn = emnist_preprocessing.create_preprocess_fn(
            num_epochs=1,
            batch_size=20,
            shuffle_buffer_size=1,
            emnist_task='digit_recognition')
        preprocessed_ds = preprocess_fn(ds)
        self.assertEqual(
            preprocessed_ds.element_spec,
            (tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),
             tf.TensorSpec(shape=(None, ), dtype=tf.int32)))

        element = next(iter(preprocessed_ds))
        expected_element = (tf.zeros(shape=(1, 28, 28, 1), dtype=tf.float32),
                            tf.zeros(shape=(1, ), dtype=tf.int32))
        self.assertAllClose(self.evaluate(element), expected_element)
예제 #9
0
 def test_preprocess_fn_with_negative_epochs_raises(self):
     with self.assertRaisesRegex(ValueError,
                                 'num_epochs must be a positive integer'):
         emnist_preprocessing.create_preprocess_fn(num_epochs=-2,
                                                   batch_size=1,
                                                   shuffle_buffer_size=1)