def test_weighted_update_state_multiple_masked_tokens_and_masked_zero(
         self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(
         vocab_size=5, masked_tokens=[3, 4], mask_zero=True)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 25% accuracy, but 50% accuracy after masking and 66%
             # accuracy after reweighting.
             [
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
             ],
             # A batch with 0% accuracy, all masked.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ],
         sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]])
     self.assertAllClose(self.evaluate(metric.result()), 1. * 2 / 3)
 def test_update_state_with_special_character(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         masked_tokens=4)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 100% accruacy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             # A batch with 50% accruacy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ])
     self.assertAllClose(self.evaluate(metric.result()), 5 / 7.0)
     metric.update_state(
         y_true=[[0, 4, 1, 2]],
         y_pred=[
             # A batch with 33% accruacy.
             [
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ])
     self.assertAllClose(self.evaluate(metric.result()), 6 / 10.0)
 def test_weighted_update_state_special_character_rank_2_sample_weight(
         self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         masked_tokens=4)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 100% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             # A batch with 50% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ],
         # A weight for each `y_true` scalar.
         sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]])
     self.assertAllClose(self.evaluate(metric.result()), (6 + 2) / 10.0)
Esempio n. 4
0
    def test_run_simple_model(self):
        vocab_size = 6
        model = models.create_recurrent_model(vocab_size, sequence_length=5)
        model.compile(
            optimizer='sgd',
            loss='sparse_categorical_crossentropy',
            metrics=[keras_metrics.FlattenedCategoricalAccuracy(vocab_size)])

        metrics = model.test_on_batch(
            x=tf.constant([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], dtype=tf.int64),
            y=tf.constant([[2, 3, 4, 5, 0], [2, 3, 4, 5, 0]], dtype=tf.int64))
        self.assertAllClose(
            metrics,
            [
                8.886,  # loss
                0.2,  # accuracy
            ],
            atol=1e-3)

        # `tf.data.Dataset.from_tensor_slices` aggresively coalesces the input into
        # a single tensor, but we want a tuple of two tensors per example, so we
        # apply a transformation to split.
        def split_to_tuple(t):
            return (t[0, :], t[1, :])

        data = tf.data.Dataset.from_tensor_slices([
            ([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]),
            ([2, 3, 4, 0, 1], [3, 4, 0, 1, 2]),
        ]).map(split_to_tuple).batch(2)
        metrics = model.evaluate(data)
        self.assertAllClose(metrics, [5.085, 0.125], atol=1e-3)
 def test_constructor_with_masked_token(self):
     metric_name = 'my_test_metric'
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         name=metric_name,
                                                         masked_tokens=100)
     self.assertIsInstance(metric, tf.keras.metrics.Metric)
     self.assertEqual(metric.name, metric_name)
     self.assertAllEqual(metric.get_config()['masked_tokens'], [100])
     self.assertEqual(self.evaluate(metric.result()), 0.0)
 def test_constructor_no_masked_token(self):
     metric_name = 'my_test_metric'
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         name=metric_name)
     self.assertIsInstance(metric, tf.keras.metrics.Metric)
     self.assertEqual(metric.name, metric_name)
     metric_config = metric.get_config()
     self.assertEqual(metric_config['vocab_size'], 5)
     self.assertEqual(self.evaluate(metric.result()), 0.0)
Esempio n. 7
0
def metrics_builder():
    """Returns a `list` of `tf.keras.metric.Metric` objects."""
    return [
        keras_metrics.NumBatchesCounter(),
        keras_metrics.NumExamplesCounter(),
        keras_metrics.FlattenedNumExamplesCounter(name='num_tokens',
                                                  mask_zero=True),
        keras_metrics.FlattenedCategoricalAccuracy(vocab_size=VOCAB_SIZE,
                                                   mask_zero=True),
    ]
Esempio n. 8
0
 def metrics_builder():
     return [
         keras_metrics.FlattenedCategoricalAccuracy(
             # Plus 4 for PAD, OOV, BOS and EOS.
             vocab_size=FLAGS.vocab_size + 4,
             name='accuracy_with_oov',
             masked_tokens=pad_token),
         keras_metrics.FlattenedCategoricalAccuracy(
             vocab_size=FLAGS.vocab_size + 4,
             name='accuracy_no_oov',
             masked_tokens=[pad_token, oov_token]),
         # Notice BOS never appears in ground truth.
         keras_metrics.FlattenedCategoricalAccuracy(
             vocab_size=FLAGS.vocab_size + 4,
             name='accuracy_no_oov_or_eos',
             masked_tokens=[pad_token, oov_token, eos_token]),
         keras_metrics.NumBatchesCounter(),
         keras_metrics.FlattenedNumExamplesCounter(name='num_tokens',
                                                   mask_zero=True),
     ]
 def test_weighted_update_state_with_scalar_weight(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5)
     metric.update_state(
         y_true=[[1, 2, 3, 4]],
         y_pred=[
             # A batch with 50% accuracy.
             [
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ],
         sample_weight=1.0)
     self.assertAllClose(self.evaluate(metric.result()), .5)
 def test_update_state_with_all_tokens_masked(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(
         vocab_size=5, masked_tokens=[1, 2, 3, 4])
     metric.update_state(
         # All batches should be masked.
         y_true=[[1, 2, 3, 4], [4, 3, 2, 1]],
         y_pred=[
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ])
     self.assertAllClose(self.evaluate(metric.result()), 0.0)
 def test_update_state_with_multiple_tokens_masked(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(
         vocab_size=5, masked_tokens=[1, 2, 3, 4])
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             [
                 # This batch should be masked.
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             [
                 # Batch with 50% accuracy
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ])
     self.assertAllClose(self.evaluate(metric.result()), 0.5)
 def test_weighted_update_state_masked_zero(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         mask_zero=True)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 100% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             # A batch with 0% accuracy, all masked.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ])
     self.assertAllClose(self.evaluate(metric.result()), 1.)
 def test_weighted_update_state_with_masked_token(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5,
                                                         masked_tokens=4)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 100% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             # A batch with 50% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ],
         # A weight for each `y_true` scalar.
         sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0])
     self.assertAllClose(self.evaluate(metric.result()), (4 + 4) / 10.0)
     metric.update_state(
         y_true=[[0, 4, 1, 2]],
         y_pred=[
             # A batch with 25% accruacy.
             [
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ],
         sample_weight=[1.0, 1.0, 2.0, 2.0])
     self.assertAllClose(self.evaluate(metric.result()), (4 + 4 + 1) / 15.0)
 def test_weighted_update_state_no_special_character(self):
     metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5)
     metric.update_state(
         y_true=[[1, 2, 3, 4], [0, 0, 0, 0]],
         y_pred=[
             # A batch with 100% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.9, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
             # A batch with 50% accuracy.
             [
                 [0.1, 0.9, 0.1, 0.1, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.9, 0.1, 0.1, 0.1, 0.0],
             ],
         ],
         # A weight for each `y_true` scalar.
         sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0])
     self.assertAllClose(self.evaluate(metric.result()), (6 + 4) / 12.0)
     metric.update_state(
         y_true=[[0, 4, 1, 2]],
         y_pred=[
             # A batch with 25% accruacy.
             [
                 [0.9, 0.1, 0.1, 0.1, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
                 [0.1, 0.1, 0.1, 0.9, 0.1],
                 [0.1, 0.1, 0.1, 0.1, 0.9],
             ],
         ],
         sample_weight=[1.0, 1.0, 2.0, 2.0])
     self.assertAllClose(self.evaluate(metric.result()), (6 + 4 + 2) / 18.0)
Esempio n. 15
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    tf.compat.v1.enable_v2_behavior()

    experiment_output_dir = FLAGS.root_output_dir
    tensorboard_dir = os.path.join(experiment_output_dir, 'logdir',
                                   FLAGS.experiment_name)
    results_dir = os.path.join(experiment_output_dir, 'results',
                               FLAGS.experiment_name)

    for path in [experiment_output_dir, tensorboard_dir, results_dir]:
        try:
            tf.io.gfile.makedirs(path)
        except tf.errors.OpError:
            pass  # Directory already exists.

    hparam_dict = collections.OrderedDict([(name, FLAGS[name].value)
                                           for name in hparam_flags])
    hparam_dict['results_file'] = results_dir
    hparams_file = os.path.join(results_dir, 'hparams.csv')
    logging.info('Saving hyper parameters to: [%s]', hparams_file)
    utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file)

    train_client_data, test_client_data = (
        tff.simulation.datasets.shakespeare.load_data())

    def preprocess(ds):
        return dataset.convert_snippets_to_character_sequence_examples(
            ds, FLAGS.batch_size, epochs=1).cache()

    train_dataset = train_client_data.create_tf_dataset_from_all_clients()
    if FLAGS.shuffle_train_data:
        train_dataset = train_dataset.shuffle(buffer_size=10000)
    train_dataset = preprocess(train_dataset)

    eval_dataset = preprocess(
        test_client_data.create_tf_dataset_from_all_clients())

    optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')()

    # Vocabulary with one OOV ID and zero for the mask.
    vocab_size = len(dataset.CHAR_VOCAB) + 2
    model = models.create_recurrent_model(vocab_size=vocab_size,
                                          batch_size=FLAGS.batch_size)
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras_metrics.FlattenedCategoricalAccuracy(vocab_size=vocab_size,
                                                       mask_zero=True)
        ])

    logging.info('Training model:')
    logging.info(model.summary())

    csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=tensorboard_dir)

    # Reduce the learning rate every 20 epochs.
    def decay_lr(epoch, lr):
        if (epoch + 1) % 20 == 0:
            return lr * 0.1
        else:
            return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1)

    history = model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=FLAGS.num_epochs,
        callbacks=[lr_callback, tensorboard_callback, csv_logger_callback])

    logging.info('Final metrics:')
    for name in ['loss', 'accuracy']:
        metric = history.history['val_{}'.format(name)][-1]
        logging.info('\t%s: %.4f', name, metric)
Esempio n. 16
0
def run_experiment():
    """Runs the training experiment."""
    (_, stackoverflow_validation,
     stackoverflow_test) = dataset.construct_word_level_datasets(
         FLAGS.vocab_size, FLAGS.batch_size, 1, FLAGS.sequence_length, -1,
         FLAGS.num_validation_examples)
    centralized_train = dataset.get_centralized_train_dataset(
        FLAGS.vocab_size, FLAGS.batch_size, FLAGS.sequence_length,
        FLAGS.shuffle_buffer_size)

    def _lstm_fn(latent_size):
        return tf.keras.layers.LSTM(latent_size, return_sequences=True)

    model = models.create_recurrent_model(
        FLAGS.vocab_size,
        _lstm_fn,
        'stackoverflow-lstm',
        shared_embedding=FLAGS.shared_embedding)
    logging.info('Training model: %s', model.summary())
    optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')()
    pad_token, oov_token, _, eos_token = dataset.get_special_tokens(
        FLAGS.vocab_size)
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=optimizer,
        metrics=[
            # Plus 4 for pad, oov, bos, eos
            keras_metrics.FlattenedCategoricalAccuracy(
                vocab_size=FLAGS.vocab_size + 4,
                name='accuracy_with_oov',
                masked_tokens=pad_token),
            keras_metrics.FlattenedCategoricalAccuracy(
                vocab_size=FLAGS.vocab_size + 4,
                name='accuracy_no_oov',
                masked_tokens=[pad_token, oov_token]),
            keras_metrics.FlattenedCategoricalAccuracy(
                vocab_size=FLAGS.vocab_size + 4,
                name='accuracy_no_oov_or_eos',
                masked_tokens=[pad_token, oov_token, eos_token]),
        ])

    train_results_path = os.path.join(FLAGS.root_output_dir, 'train_results',
                                      FLAGS.experiment_name)
    test_results_path = os.path.join(FLAGS.root_output_dir, 'test_results',
                                     FLAGS.experiment_name)

    train_csv_logger = keras_callbacks.AtomicCSVLogger(train_results_path)
    test_csv_logger = keras_callbacks.AtomicCSVLogger(test_results_path)

    log_dir = os.path.join(FLAGS.root_output_dir, 'logdir',
                           FLAGS.experiment_name)
    try:
        tf.io.gfile.makedirs(log_dir)
        tf.io.gfile.makedirs(train_results_path)
        tf.io.gfile.makedirs(test_results_path)
    except tf.errors.OpError:
        pass  # log_dir already exists.

    train_tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir,
        write_graph=True,
        update_freq=FLAGS.tensorboard_update_frequency)

    test_tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

    # Write the hyperparameters to a CSV:
    hparam_dict = collections.OrderedDict([(name, FLAGS[name].value)
                                           for name in hparam_flags])
    hparams_file = os.path.join(FLAGS.root_output_dir, FLAGS.experiment_name,
                                'hparams.csv')
    utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file)

    model.fit(centralized_train,
              epochs=FLAGS.epochs,
              verbose=0,
              validation_data=stackoverflow_validation,
              callbacks=[train_csv_logger, train_tensorboard_callback])
    score = model.evaluate(
        stackoverflow_test,
        verbose=0,
        callbacks=[test_csv_logger, test_tensorboard_callback])
    logging.info('Final test loss: %.4f', score[0])
    logging.info('Final test accuracy: %.4f', score[1])