def test_run_simple_model(self): vocab_size = 6 mask_model = shakespeare_models.create_recurrent_model( vocab_size, sequence_length=5) mask_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=[keras_metrics.MaskedCategoricalAccuracy()]) no_mask_model = shakespeare_models.create_recurrent_model( vocab_size, sequence_length=5, mask_zero=False) no_mask_model.compile( optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=[keras_metrics.MaskedCategoricalAccuracy()]) constant_test_weights = tf.nest.map_structure(tf.ones_like, mask_model.weights) mask_model.set_weights(constant_test_weights) no_mask_model.set_weights(constant_test_weights) # `tf.data.Dataset.from_tensor_slices` aggresively coalesces the input into # a single tensor, but we want a tuple of two tensors per example, so we # apply a transformation to split. def split_to_tuple(t): return (t[0, :], t[1, :]) data = tf.data.Dataset.from_tensor_slices([ ([0, 1, 2, 3, 4], [1, 2, 3, 4, 0]), ([2, 3, 4, 0, 1], [3, 4, 0, 1, 2]), ]).map(split_to_tuple).batch(2) mask_metrics = mask_model.evaluate(data) no_mask_metrics = no_mask_model.evaluate(data) self.assertNotAllClose(mask_metrics, no_mask_metrics, atol=1e-3)
def metrics_fn(): return [ keras_metrics.MaskedCategoricalAccuracy( name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), # Notice BOS never appears in ground truth. keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), keras_metrics.NumBatchesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]) ]
def test_update_state_with_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 5 / 7.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 33% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 6 / 10.0)
def _get_stackoverflow_metrics(vocab_size, num_oov_buckets): """Metrics for stackoverflow dataset.""" special_tokens = stackoverflow_dataset.get_special_tokens( vocab_size, num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos return [ keras_metrics.MaskedCategoricalAccuracy( name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), ]
def eval_metrics_builder(): pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() return [ tf.keras.metrics.SparseCategoricalCrossentropy(), keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]), ]
def test_update_state_with_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertEqual(self.evaluate(metric.result()), 6 / 8.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 8 / 12.0)
def test_constructor_no_masked_token(self): metric_name = 'my_test_metric' metric = keras_metrics.MaskedCategoricalAccuracy(name=metric_name) self.assertIsInstance(metric, tf.keras.metrics.Metric) self.assertEqual(metric.name, metric_name) self.assertAllEqual(metric.get_config()['masked_tokens'], []) self.assertEqual(self.evaluate(metric.result()), 0.0)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, sequence_length: Optional[int] = 80, max_batches: Optional[int] = None): """Trains a two-layer RNN on Shakespeare next-character-prediction. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. sequence_length: The sequence length used for Shakespeare preprocessing. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ shakespeare_train, shakespeare_test = shakespeare_dataset.get_centralized_datasets( train_batch_size=batch_size, sequence_length=sequence_length) if max_batches and max_batches >= 1: shakespeare_train = shakespeare_train.take(max_batches) shakespeare_test = shakespeare_test.take(max_batches) pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() model = shakespeare_models.create_recurrent_model( vocab_size=VOCAB_SIZE, sequence_length=sequence_length) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]) ]) centralized_training_loop.run(keras_model=model, train_dataset=shakespeare_train, validation_dataset=shakespeare_test, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)
def metrics_builder(): """Returns a `list` of `tf.keras.metric.Metric` objects.""" pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() return [ keras_metrics.NumBatchesCounter(), keras_metrics.NumExamplesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]), ]
def test_weighted_update_state_with_scalar_weight(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4]], y_pred=[ # A batch with 50% accuracy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=1.0) self.assertAllClose(self.evaluate(metric.result()), .5)
def test_update_state_with_all_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy( masked_tokens=[1, 2, 3, 4]) metric.update_state( # All batches should be masked. y_true=[[1, 2, 3, 4], [4, 3, 2, 1]], y_pred=[ [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.0)
def test_update_state_with_multiple_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy( masked_tokens=[1, 2, 3, 4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ [ # This batch should be masked. [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ # Batch with 50% accuracy [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.5)
def test_weighted_update_state_with_masked_token(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4) / 10.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4 + 1) / 15.0)
def test_weighted_update_state_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4) / 12.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4 + 2) / 18.0)
def test_weighted_update_state_special_character_rank_2_sample_weight( self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (6 + 2) / 10.0)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, vocab_size: int = 10000, num_oov_buckets: int = 1, d_embed: int = 96, d_model: int = 512, d_hidden: int = 2048, num_heads: int = 8, num_layers: int = 1, max_position_encoding: int = 1000, dropout: float = 0.1, num_validation_examples: int = 10000, sequence_length: int = 20, experiment_name: str = 'centralized_stackoverflow', root_output_dir: str = '/tmp/fedopt_guide', hparams_dict: Optional[Mapping[str, Any]] = None, max_batches: Optional[int] = None): """Trains an Transformer on the Stack Overflow next word prediction task. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. vocab_size: Vocab size for normal tokens. num_oov_buckets: Number of out of vocabulary buckets. d_embed: Dimension of the token embeddings. d_model: Dimension of features of MultiHeadAttention layers. d_hidden: Dimension of hidden layers of the FFN. num_heads: Number of attention heads. num_layers: Number of Transformer blocks. max_position_encoding: Maximum number of positions for position embeddings. dropout: Dropout rate. num_validation_examples: The number of test examples to use for validation. sequence_length: The maximum number of words to take for each sequence. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ train_dataset, validation_dataset, test_dataset = stackoverflow_word_prediction.get_centralized_datasets( vocab_size, sequence_length, train_batch_size=batch_size, num_validation_examples=num_validation_examples, num_oov_buckets=num_oov_buckets, ) if max_batches and max_batches >= 1: train_dataset = train_dataset.take(max_batches) validation_dataset = validation_dataset.take(max_batches) test_dataset = test_dataset.take(max_batches) model = transformer_models.create_transformer_lm( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets, d_embed=d_embed, d_model=d_model, d_hidden=d_hidden, num_heads=num_heads, num_layers=num_layers, max_position_encoding=max_position_encoding, dropout=dropout, name='stackoverflow-transformer') special_tokens = stackoverflow_word_prediction.get_special_tokens( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ keras_metrics.MaskedCategoricalAccuracy(name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), ]) centralized_training_loop.run(keras_model=model, train_dataset=train_dataset, validation_dataset=validation_dataset, test_dataset=test_dataset, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, vocab_size: Optional[int] = 10000, num_oov_buckets: Optional[int] = 1, sequence_length: Optional[int] = 20, num_validation_examples: Optional[int] = 10000, embedding_size: Optional[int] = 96, latent_size: Optional[int] = 670, num_layers: Optional[int] = 1, shared_embedding: Optional[bool] = False, max_batches: Optional[int] = None): """Trains an RNN on the Stack Overflow next word prediction task. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. vocab_size: Integer dictating the number of most frequent words to use in the vocabulary. num_oov_buckets: The number of out-of-vocabulary buckets to use. sequence_length: The maximum number of words to take for each sequence. num_validation_examples: The number of test examples to use for validation. embedding_size: The dimension of the word embedding layer. latent_size: The dimension of the latent units in the recurrent layers. num_layers: The number of stacked recurrent layers to use. shared_embedding: Boolean indicating whether to tie input and output embeddings. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ train_dataset, validation_dataset, test_dataset = stackoverflow_word_prediction.get_centralized_datasets( vocab_size=vocab_size, max_sequence_length=sequence_length, train_batch_size=batch_size, num_validation_examples=num_validation_examples, num_oov_buckets=num_oov_buckets, ) if max_batches and max_batches >= 1: train_dataset = train_dataset.take(max_batches) validation_dataset = validation_dataset.take(max_batches) test_dataset = test_dataset.take(max_batches) model = stackoverflow_models.create_recurrent_model( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets, name='stackoverflow-lstm', embedding_size=embedding_size, latent_size=latent_size, num_layers=num_layers, shared_embedding=shared_embedding) special_tokens = stackoverflow_word_prediction.get_special_tokens( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ keras_metrics.MaskedCategoricalAccuracy(name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), ]) centralized_training_loop.run(keras_model=model, train_dataset=train_dataset, validation_dataset=validation_dataset, test_dataset=test_dataset, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)