def test_weighted_update_state_multiple_masked_tokens_and_masked_zero( self): metric = keras_metrics.FlattenedCategoricalAccuracy( vocab_size=5, masked_tokens=[3, 4], mask_zero=True) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 25% accuracy, but 50% accuracy after masking and 66% # accuracy after reweighting. [ [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], ], # A batch with 0% accuracy, all masked. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), 1. * 2 / 3)
def test_update_state_with_special_character(self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, masked_tokens=4) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 5 / 7.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 33% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 6 / 10.0)
def test_weighted_update_state_special_character_rank_2_sample_weight( self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, masked_tokens=4) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (6 + 2) / 10.0)
def test_run_simple_model(self): vocab_size = 6 model = models.create_recurrent_model(vocab_size, sequence_length=5) model.compile( optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=[keras_metrics.FlattenedCategoricalAccuracy(vocab_size)]) metrics = model.test_on_batch( x=tf.constant([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], dtype=tf.int64), y=tf.constant([[2, 3, 4, 5, 0], [2, 3, 4, 5, 0]], dtype=tf.int64)) self.assertAllClose( metrics, [ 8.886, # loss 0.2, # accuracy ], atol=1e-3) # `tf.data.Dataset.from_tensor_slices` aggresively coalesces the input into # a single tensor, but we want a tuple of two tensors per example, so we # apply a transformation to split. def split_to_tuple(t): return (t[0, :], t[1, :]) data = tf.data.Dataset.from_tensor_slices([ ([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]), ([2, 3, 4, 0, 1], [3, 4, 0, 1, 2]), ]).map(split_to_tuple).batch(2) metrics = model.evaluate(data) self.assertAllClose(metrics, [5.085, 0.125], atol=1e-3)
def test_constructor_with_masked_token(self): metric_name = 'my_test_metric' metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, name=metric_name, masked_tokens=100) self.assertIsInstance(metric, tf.keras.metrics.Metric) self.assertEqual(metric.name, metric_name) self.assertAllEqual(metric.get_config()['masked_tokens'], [100]) self.assertEqual(self.evaluate(metric.result()), 0.0)
def test_constructor_no_masked_token(self): metric_name = 'my_test_metric' metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, name=metric_name) self.assertIsInstance(metric, tf.keras.metrics.Metric) self.assertEqual(metric.name, metric_name) metric_config = metric.get_config() self.assertEqual(metric_config['vocab_size'], 5) self.assertEqual(self.evaluate(metric.result()), 0.0)
def metrics_builder(): """Returns a `list` of `tf.keras.metric.Metric` objects.""" return [ keras_metrics.NumBatchesCounter(), keras_metrics.NumExamplesCounter(), keras_metrics.FlattenedNumExamplesCounter(name='num_tokens', mask_zero=True), keras_metrics.FlattenedCategoricalAccuracy(vocab_size=VOCAB_SIZE, mask_zero=True), ]
def metrics_builder(): return [ keras_metrics.FlattenedCategoricalAccuracy( # Plus 4 for PAD, OOV, BOS and EOS. vocab_size=FLAGS.vocab_size + 4, name='accuracy_with_oov', masked_tokens=pad_token), keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov', masked_tokens=[pad_token, oov_token]), # Notice BOS never appears in ground truth. keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, oov_token, eos_token]), keras_metrics.NumBatchesCounter(), keras_metrics.FlattenedNumExamplesCounter(name='num_tokens', mask_zero=True), ]
def test_weighted_update_state_with_scalar_weight(self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5) metric.update_state( y_true=[[1, 2, 3, 4]], y_pred=[ # A batch with 50% accuracy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=1.0) self.assertAllClose(self.evaluate(metric.result()), .5)
def test_update_state_with_all_tokens_masked(self): metric = keras_metrics.FlattenedCategoricalAccuracy( vocab_size=5, masked_tokens=[1, 2, 3, 4]) metric.update_state( # All batches should be masked. y_true=[[1, 2, 3, 4], [4, 3, 2, 1]], y_pred=[ [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.0)
def test_update_state_with_multiple_tokens_masked(self): metric = keras_metrics.FlattenedCategoricalAccuracy( vocab_size=5, masked_tokens=[1, 2, 3, 4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ [ # This batch should be masked. [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ # Batch with 50% accuracy [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.5)
def test_weighted_update_state_masked_zero(self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, mask_zero=True) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 0% accuracy, all masked. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 1.)
def test_weighted_update_state_with_masked_token(self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5, masked_tokens=4) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4) / 10.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4 + 1) / 15.0)
def test_weighted_update_state_no_special_character(self): metric = keras_metrics.FlattenedCategoricalAccuracy(vocab_size=5) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4) / 12.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4 + 2) / 18.0)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') tf.compat.v1.enable_v2_behavior() experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_client_data, test_client_data = ( tff.simulation.datasets.shakespeare.load_data()) def preprocess(ds): return dataset.convert_snippets_to_character_sequence_examples( ds, FLAGS.batch_size, epochs=1).cache() train_dataset = train_client_data.create_tf_dataset_from_all_clients() if FLAGS.shuffle_train_data: train_dataset = train_dataset.shuffle(buffer_size=10000) train_dataset = preprocess(train_dataset) eval_dataset = preprocess( test_client_data.create_tf_dataset_from_all_clients()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() # Vocabulary with one OOV ID and zero for the mask. vocab_size = len(dataset.CHAR_VOCAB) + 2 model = models.create_recurrent_model(vocab_size=vocab_size, batch_size=FLAGS.batch_size) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.FlattenedCategoricalAccuracy(vocab_size=vocab_size, mask_zero=True) ]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) # Reduce the learning rate every 20 epochs. def decay_lr(epoch, lr): if (epoch + 1) % 20 == 0: return lr * 0.1 else: return lr lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'accuracy']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def run_experiment(): """Runs the training experiment.""" (_, stackoverflow_validation, stackoverflow_test) = dataset.construct_word_level_datasets( FLAGS.vocab_size, FLAGS.batch_size, 1, FLAGS.sequence_length, -1, FLAGS.num_validation_examples) centralized_train = dataset.get_centralized_train_dataset( FLAGS.vocab_size, FLAGS.batch_size, FLAGS.sequence_length, FLAGS.shuffle_buffer_size) def _lstm_fn(latent_size): return tf.keras.layers.LSTM(latent_size, return_sequences=True) model = models.create_recurrent_model( FLAGS.vocab_size, _lstm_fn, 'stackoverflow-lstm', shared_embedding=FLAGS.shared_embedding) logging.info('Training model: %s', model.summary()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() pad_token, oov_token, _, eos_token = dataset.get_special_tokens( FLAGS.vocab_size) model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ # Plus 4 for pad, oov, bos, eos keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_with_oov', masked_tokens=pad_token), keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov', masked_tokens=[pad_token, oov_token]), keras_metrics.FlattenedCategoricalAccuracy( vocab_size=FLAGS.vocab_size + 4, name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, oov_token, eos_token]), ]) train_results_path = os.path.join(FLAGS.root_output_dir, 'train_results', FLAGS.experiment_name) test_results_path = os.path.join(FLAGS.root_output_dir, 'test_results', FLAGS.experiment_name) train_csv_logger = keras_callbacks.AtomicCSVLogger(train_results_path) test_csv_logger = keras_callbacks.AtomicCSVLogger(test_results_path) log_dir = os.path.join(FLAGS.root_output_dir, 'logdir', FLAGS.experiment_name) try: tf.io.gfile.makedirs(log_dir) tf.io.gfile.makedirs(train_results_path) tf.io.gfile.makedirs(test_results_path) except tf.errors.OpError: pass # log_dir already exists. train_tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, write_graph=True, update_freq=FLAGS.tensorboard_update_frequency) test_tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) # Write the hyperparameters to a CSV: hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparams_file = os.path.join(FLAGS.root_output_dir, FLAGS.experiment_name, 'hparams.csv') utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) model.fit(centralized_train, epochs=FLAGS.epochs, verbose=0, validation_data=stackoverflow_validation, callbacks=[train_csv_logger, train_tensorboard_callback]) score = model.evaluate( stackoverflow_test, verbose=0, callbacks=[test_csv_logger, test_tensorboard_callback]) logging.info('Final test loss: %.4f', score[0]) logging.info('Final test accuracy: %.4f', score[1])