def _create_model(params, is_train): """Creates transformer model.""" encdec_kwargs = dict(num_layers=params["num_hidden_layers"], num_attention_heads=params["num_heads"], intermediate_size=params["filter_size"], activation="relu", dropout_rate=params["relu_dropout"], attention_dropout_rate=params["attention_dropout"], use_bias=False, norm_first=True, norm_epsilon=1e-6, intermediate_dropout=params["relu_dropout"]) encoder_layer = models.TransformerEncoder(**encdec_kwargs) decoder_layer = models.TransformerDecoder(**encdec_kwargs) model_kwargs = dict(vocab_size=params["vocab_size"], embedding_width=params["hidden_size"], dropout_rate=params["layer_postprocess_dropout"], padded_decode=params["padded_decode"], decode_max_length=params["decode_max_length"], dtype=params["dtype"], extra_decode_length=params["extra_decode_length"], beam_size=params["beam_size"], alpha=params["alpha"], encoder_layer=encoder_layer, decoder_layer=decoder_layer, name="transformer_v2") if is_train: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") internal_model = models.Seq2SeqTransformer(**model_kwargs) logits = internal_model(dict(inputs=inputs, targets=targets), training=is_train) vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] if params["enable_metrics_in_training"]: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits", dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) loss = metrics.transformer_loss(logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model batch_size = params["decode_batch_size"] if params[ "padded_decode"] else None inputs = tf.keras.layers.Input((None, ), batch_size=batch_size, dtype="int64", name="inputs") internal_model = models.Seq2SeqTransformer(**model_kwargs) ret = internal_model(dict(inputs=inputs), training=is_train) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model(inputs, [outputs, scores])
def create_model(params, is_train): """Creates transformer model.""" with tf.name_scope("model"): if is_train: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") internal_model = Transformer(params, name="transformer_v2") logits = internal_model([inputs, targets], training=is_train) vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] if params["enable_metrics_in_training"]: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits", dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) loss = metrics.transformer_loss(logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model else: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") internal_model = Transformer(params, name="transformer_v2") ret = internal_model([inputs], training=is_train) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model(inputs, [outputs, scores])
def test_metric_layer(self): vocab_size = 50 logits = tf.keras.layers.Input((None, vocab_size), dtype="float32", name="logits") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") output_logits = metrics.MetricLayer(vocab_size)([logits, targets]) self.assertEqual(output_logits.shape.as_list(), [ None, None, vocab_size, ])
def continuous_eval(strategy, params, model_type, eval_file_pattern=None, batch_size=4, eval_steps=None, model_dir=None, timeout=3000): """Continuously evaluate checkpoints on testing data.""" test_dataset = input_pipeline.get_input_dataset( eval_file_pattern, batch_size=batch_size, params=params, is_training=False, strategy=strategy) with strategy.scope(): model = models.create_model(model_type, params) metric_layer = metrics_v2.MetricLayer(params.vocab_size) eval_summary_writer = tf.summary.create_file_writer( os.path.join(model_dir, "summaries/eval")) global_step = tf.Variable( 0, trainable=False, dtype=tf.int64, aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, shape=[]) @tf.function def test_step(inputs): """Calculates evaluation metrics on distributed devices.""" def _test_step_fn(inputs): """Replicated accuracy calculation.""" targets = models.remove_sos_from_seq(inputs["target_ids"], params.pad_token_id) # Using ground truth sequences as targets to calculate logits for accuracy # and perplexity metrics. logits, _, _ = model(inputs, training=False, mode="train") metric_layer([logits, targets]) # Get logits from top beam search results for bleu and rouge metrics. logits = model(inputs, training=False, mode="eval") return targets, logits outputs = strategy.run(_test_step_fn, args=(inputs,)) return tf.nest.map_structure(strategy.experimental_local_results, outputs) metrics_and_funcs = [ (tf.keras.metrics.Mean("bleu", dtype=tf.float32), bleu_score), (tf.keras.metrics.Mean("rouge_2_fscore", dtype=tf.float32), rouge_2_fscore), (tf.keras.metrics.Mean("rouge_l_fscore", dtype=tf.float32), rouge_l_fscore), ] eval_results = {} for latest_checkpoint in tf.train.checkpoints_iterator( model_dir, timeout=timeout): checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) checkpoint.restore(latest_checkpoint).expect_partial() logging.info("Loaded checkpoint %s", latest_checkpoint) for i, inputs in enumerate(test_dataset): if eval_steps and i >= eval_steps: break outputs = test_step(inputs) for metric, func in metrics_and_funcs: for targets, logits in zip(outputs[0], outputs[1]): metric.update_state(func(logits.numpy(), targets.numpy())) with eval_summary_writer.as_default(): step = global_step.numpy() for metric, _ in metrics_and_funcs: eval_results[metric.name] = metric.result().numpy().astype(float) tf.summary.scalar( metric.name, eval_results[metric.name], step=step) for metric in metric_layer.metrics: eval_results[metric.name] = metric.result().numpy().astype(float) tf.summary.scalar( metric.name, eval_results[metric.name], step=step) logging.info("Step %d Metrics= %s", step, str(eval_results)) eval_summary_writer.flush() # Resets metrics. for metric, _ in metrics_and_funcs: metric.reset_states() for metric in metric_layer.metrics: metric.reset_states() return eval_results