def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" if isinstance(features, dict): if not labels and "labels" in features: labels = features["labels"] features = features["input_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel(bert_config) headl = ClassifierLossLayer( bert_config["num_labels"], bert_config["hidden_dropout_prob"], utils.create_initializer(bert_config["initializer_range"]), name=bert_config["scope"]+"/classifier") _, pooled_output = model(features, training=is_training) total_loss, log_probs = headl(pooled_output, labels, is_training) tvars = tf.compat.v1.trainable_variables() utils.log_variables(tvars, bert_config["ckpt_var_list"]) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: learning_rate = optimization.get_linear_warmup_linear_decay_lr( init_lr=bert_config["learning_rate"], num_train_steps=bert_config["num_train_steps"], num_warmup_steps=bert_config["num_warmup_steps"]) optimizer = optimization.get_optimizer(bert_config, learning_rate) global_step = tf.compat.v1.train.get_or_create_global_step() gradients = optimizer.compute_gradients(total_loss, tvars) train_op = optimizer.apply_gradients(gradients, global_step=global_step) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, host_call=utils.add_scalars_to_summary( bert_config["output_dir"], {"learning_rate": learning_rate})) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(loss_value, label_ids, log_probs): loss = tf.compat.v1.metrics.mean(values=loss_value) predictions = tf.argmax(log_probs, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions) p1, p1_op = tf.compat.v1.metrics.precision_at_k( labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1) r1, r1_op = tf.compat.v1.metrics.recall_at_k( labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1) f11 = tf.math.divide_no_nan(2*p1*r1, p1+r1) metric_dict = { "P@1": (p1, p1_op), "R@1": (r1, r1_op), "f1@1": (f11, tf.no_op()), "classification_accuracy": accuracy, "classification_loss": loss, } return metric_dict eval_metrics = (metric_fn, [tf.expand_dims(total_loss, 0), labels, log_probs]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) else: output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions={"log-probabilities": log_probs}) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel(bert_config) masked_lm = MaskedLMLayer(bert_config["hidden_size"], bert_config["vocab_size"], model.embeder, initializer=utils.create_initializer( bert_config["initializer_range"]), activation_fn=utils.get_activation( bert_config["hidden_act"])) next_sentence = NSPLayer(bert_config["hidden_size"], initializer=utils.create_initializer( bert_config["initializer_range"])) sequence_output, pooled_output = model( features["input_ids"], training=is_training, token_type_ids=features.get("segment_ids")) masked_lm_loss, masked_lm_log_probs = masked_lm( sequence_output, label_ids=features.get("masked_lm_ids"), label_weights=features.get("masked_lm_weights"), masked_lm_positions=features.get("masked_lm_positions")) next_sentence_loss, next_sentence_log_probs = next_sentence( pooled_output, features.get("next_sentence_labels")) total_loss = masked_lm_loss if bert_config["use_nsp"]: total_loss += next_sentence_loss tvars = tf.compat.v1.trainable_variables() utils.log_variables(tvars, bert_config["ckpt_var_list"]) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: learning_rate = optimization.get_linear_warmup_linear_decay_lr( init_lr=bert_config["learning_rate"], num_train_steps=bert_config["num_train_steps"], num_warmup_steps=bert_config["num_warmup_steps"]) optimizer = optimization.get_optimizer(bert_config, learning_rate) global_step = tf.compat.v1.train.get_global_step() gradients = optimizer.compute_gradients(total_loss, tvars) train_op = optimizer.apply_gradients(gradients, global_step=global_step) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, host_call=utils.add_scalars_to_summary( bert_config["output_dir"], {"learning_rate": learning_rate})) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_loss_value, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_loss_value, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_accuracy = tf.compat.v1.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.compat.v1.metrics.mean( values=masked_lm_loss_value) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_accuracy = tf.compat.v1.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.compat.v1.metrics.mean( values=next_sentence_loss_value) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_loss, masked_lm_log_probs, features["masked_lm_ids"], features["masked_lm_weights"], next_sentence_loss, next_sentence_log_probs, features["next_sentence_labels"] ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) else: output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions={ "log-probabilities": masked_lm_log_probs, "seq-embeddings": sequence_output }) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" if isinstance(features, dict): if not labels and "target_ids" in features: labels = features["target_ids"] features = features["input_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.TransformerModel(transformer_config) (llh, logits, pred_ids), _ = model(features, target_ids=labels, training=is_training) total_loss = padded_cross_entropy_loss( logits, labels, transformer_config["label_smoothing"], transformer_config["vocab_size"]) tvars = tf.compat.v1.trainable_variables() utils.log_variables(tvars, transformer_config["ckpt_var_list"]) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: learning_rate = optimization.get_linear_warmup_rsqrt_decay_lr( init_lr=transformer_config["learning_rate"], hidden_size=transformer_config["hidden_size"], num_warmup_steps=transformer_config["num_warmup_steps"]) optimizer = optimization.get_optimizer(transformer_config, learning_rate) global_step = tf.compat.v1.train.get_global_step() if not transformer_config["use_bias"]: logging.info("Fixing position embedding, i.e. not trainable.") posemb = "pegasus/embeddings/position_embeddings" tvars = list( filter(lambda v: v.name.split(":")[0] != posemb, tvars)) gradients = optimizer.compute_gradients(total_loss, tvars) train_op = optimizer.apply_gradients(gradients, global_step=global_step) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, host_call=utils.add_scalars_to_summary( transformer_config["output_dir"], {"learning_rate": learning_rate})) elif mode == tf.estimator.ModeKeys.EVAL: tokenizer = tft.SentencepieceTokenizer(model=tf.io.gfile.GFile( transformer_config["vocab_model_file"], "rb").read()) def rouge_py_func(label_sent, pred_sent): """Approximate ROUGE scores, always run externally for final scores.""" scorer = rouge_scorer.RougeScorer( ["rouge1", "rouge2", "rougeLsum"], use_stemmer=True) r1, r2, rl = 0.0, 0.0, 0.0 for ls, ps in zip(label_sent, pred_sent): score = scorer.score(ls.decode("utf-8"), ps.decode("utf-8")) r1 += score["rouge1"].fmeasure r2 += score["rouge2"].fmeasure rl += score["rougeLsum"].fmeasure return r1 / len(label_sent), r2 / len(label_sent), rl / len( label_sent) def metric_fn(loss, log_probs, label_ids, pred_ids): loss = tf.compat.v1.metrics.mean(values=loss) log_probs = tf.compat.v1.metrics.mean( values=log_probs, weights=tf.cast(tf.not_equal(label_ids, 0), tf.float32)) metric_dict = { "prediction_loss": loss, "log_likelihood": log_probs, } if not transformer_config["use_tpu"]: # Approximate ROUGE scores if not running on tpus. # Always run externally for final scores. label_sent = tokenizer.detokenize(label_ids) label_sent = tf.strings.regex_replace( label_sent, r"([<\[]\S+[>\]])", b" \\1") pred_sent = tokenizer.detokenize(pred_ids) pred_sent = tf.strings.regex_replace( pred_sent, r"([<\[]\S+[>\]])", b" \\1") if transformer_config["substitute_newline"]: label_sent = tf.strings.regex_replace( label_sent, transformer_config["substitute_newline"], "\n") pred_sent = tf.strings.regex_replace( pred_sent, transformer_config["substitute_newline"], "\n") rouge_value = tf.compat.v1.py_func( func=rouge_py_func, inp=[label_sent, pred_sent], Tout=[tf.float64, tf.float64, tf.float64], stateful=False) rouge_value = tf.cast(rouge_value, tf.float32) rouge1 = tf.compat.v1.metrics.mean(values=rouge_value[0]) rouge2 = tf.compat.v1.metrics.mean(values=rouge_value[1]) rougeL = tf.compat.v1.metrics.mean(values=rouge_value[2]) # pylint: disable=invalid-name metric_dict.update({ "eval/Rouge-1": rouge1, "eval/Rouge-2": rouge2, "eval/Rouge-L": rougeL, }) return metric_dict eval_metrics = (metric_fn, [total_loss, llh, labels, pred_ids]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) else: prediction_dict = {"pred_ids": pred_ids} if not transformer_config["use_tpu"]: tokenizer = tft.SentencepieceTokenizer(model=tf.io.gfile.GFile( transformer_config["vocab_model_file"], "rb").read()) pred_sent = tokenizer.detokenize(pred_ids) # Add a space before special tokens. pred_sent = tf.strings.regex_replace(pred_sent, r"([<\[]\S+[>\]])", b" \\1") if transformer_config["substitute_newline"]: pred_sent = tf.strings.regex_replace( pred_sent, transformer_config["substitute_newline"], "\n") prediction_dict.update({"pred_sent": pred_sent}) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=prediction_dict) return output_spec