def metrics_builder(): return [ keras_metrics.MaskedCategoricalAccuracy(name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), # Notice BOS never appears in ground truth. keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), keras_metrics.NumBatchesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]) ]
def test_update_state_with_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 5 / 7.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 33% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 6 / 10.0)
def test_constructor_no_masked_token(self): metric_name = 'my_test_metric' metric = keras_metrics.MaskedCategoricalAccuracy(name=metric_name) self.assertIsInstance(metric, tf.keras.metrics.Metric) self.assertEqual(metric.name, metric_name) self.assertAllEqual(metric.get_config()['masked_tokens'], []) self.assertEqual(self.evaluate(metric.result()), 0.0)
def test_update_state_with_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertEqual(self.evaluate(metric.result()), 6 / 8.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 8 / 12.0)
def metrics_builder(): """Returns a `list` of `tf.keras.metric.Metric` objects.""" pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() return [ keras_metrics.NumBatchesCounter(), keras_metrics.NumExamplesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]), ]
def test_weighted_update_state_with_scalar_weight(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4]], y_pred=[ # A batch with 50% accuracy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=1.0) self.assertAllClose(self.evaluate(metric.result()), .5)
def test_update_state_with_all_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[1, 2, 3, 4]) metric.update_state( # All batches should be masked. y_true=[[1, 2, 3, 4], [4, 3, 2, 1]], y_pred=[ [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.0)
def test_update_state_with_multiple_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[1, 2, 3, 4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ [ # This batch should be masked. [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ # Batch with 50% accuracy [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.5)
def test_weighted_update_state_special_character_rank_2_sample_weight(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (6 + 2) / 10.0)
def test_weighted_update_state_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4) / 12.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4 + 2) / 18.0)
def test_weighted_update_state_with_masked_token(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4) / 10.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4 + 1) / 15.0)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, vocab_size: Optional[int] = 10000, num_oov_buckets: Optional[int] = 1, sequence_length: Optional[int] = 20, num_validation_examples: Optional[int] = 10000, embedding_size: Optional[int] = 96, latent_size: Optional[int] = 670, num_layers: Optional[int] = 1, shared_embedding: Optional[bool] = False, max_batches: Optional[int] = None, cache_dir: Optional[str] = None): """Trains an RNN on the Stack Overflow next word prediction task. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. vocab_size: Integer dictating the number of most frequent words to use in the vocabulary. num_oov_buckets: The number of out-of-vocabulary buckets to use. sequence_length: The maximum number of words to take for each sequence. num_validation_examples: The number of test examples to use for validation. embedding_size: The dimension of the word embedding layer. latent_size: The dimension of the latent units in the recurrent layers. num_layers: The number of stacked recurrent layers to use. shared_embedding: Boolean indicating whether to tie input and output embeddings. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ train_dataset, validation_dataset, test_dataset = stackoverflow_word_prediction.get_centralized_datasets( vocab_size=vocab_size, max_sequence_length=sequence_length, train_batch_size=batch_size, num_validation_examples=num_validation_examples, num_oov_buckets=num_oov_buckets, cache_dir=cache_dir) if max_batches and max_batches >= 1: train_dataset = train_dataset.take(max_batches) validation_dataset = validation_dataset.take(max_batches) test_dataset = test_dataset.take(max_batches) model = stackoverflow_models.create_recurrent_model( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets, name='stackoverflow-lstm', embedding_size=embedding_size, latent_size=latent_size, num_layers=num_layers, shared_embedding=shared_embedding) special_tokens = stackoverflow_word_prediction.get_special_tokens( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ keras_metrics.MaskedCategoricalAccuracy(name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), ]) centralized_training_loop.run(keras_model=model, train_dataset=train_dataset, validation_dataset=validation_dataset, test_dataset=test_dataset, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, sequence_length: Optional[int] = 80, max_batches: Optional[int] = None, cache_dir: Optional[str] = None): """Trains a two-layer RNN on Shakespeare next-character-prediction. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. sequence_length: The sequence length used for Shakespeare preprocessing. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ shakespeare_train, shakespeare_test = shakespeare_dataset.get_centralized_datasets( train_batch_size=batch_size, sequence_length=sequence_length, cache_dir=cache_dir) if max_batches and max_batches >= 1: shakespeare_train = shakespeare_train.take(max_batches) shakespeare_test = shakespeare_test.take(max_batches) pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() model = shakespeare_models.create_recurrent_model( vocab_size=VOCAB_SIZE, sequence_length=sequence_length) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]) ]) centralized_training_loop.run(keras_model=model, train_dataset=shakespeare_train, validation_dataset=shakespeare_test, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)