def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value if self.__is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [self.__num_classes, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [self.__num_classes], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if self.__num_classes == 1: self.predictions = tf.cast(tf.greater_equal(logits, 0.0), dtype=tf.int32, name="predictions") else: self.predictions = tf.argmax(logits, axis=-1, name="predictions") if self.__is_training: with tf.name_scope("loss"): if self.__num_classes == 1: losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=tf.reshape(logits, [-1]), labels=tf.cast(self.label_ids, dtype=tf.float32)) else: losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.label_ids) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, albert_hub_module_handle) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}) else: raise ValueError( "Only TRAIN, EVAL and PREDICT modes are supported: %s" % (mode)) return output_spec
def inference(self): output_layer = self.model.get_pooled_output() logging.info(output_layer) with tf.variable_scope("loss"): def apply_dropout_last_layer(output_layer): output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) return output_layer def not_apply_dropout(output_layer): return output_layer output_layer = tf.cond( self.is_training, lambda: apply_dropout_last_layer(output_layer), lambda: not_apply_dropout(output_layer)) match_1 = tf.strided_slice(output_layer, [0], [train_batch_size], [2]) match_2 = tf.strided_slice(output_layer, [1], [train_batch_size], [2]) match = tf.concat([match_1, match_2], 1) self.logits = tf.layers.dense(match, self.num_labels, name='fc') #print(self.logits) #self.logits = tf.layers.flatten(self.logits) #self.logits = tf.argmax(self.logits,axis=-1) logging.info(self.logits) self.r_labels = tf.strided_slice(self.labels, [0], [train_batch_size], [2]) logging.info(self.r_labels) self.r_labels = tf.expand_dims(self.r_labels, -1) logging.info(self.r_labels) self.loss = tf.losses.mean_squared_error(self.logits, self.r_labels) self.optim = optimization.create_optimizer(self.loss, learning_rate, num_train_steps, num_warmup_steps, False)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, probabilities, logits, predictions) = \ create_model(albert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, max_seq_length, dropout_prob, hub_module) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean( values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities, "predictions": predictions}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, probabilities, logits, predictions) = \ create_model(albert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, task_name, hub_module) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: if task_name not in ["sts-b", "cola"]: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } elif task_name == "sts-b": def metric_fn(per_example_loss, label_ids, logits, is_real_example): """Compute Pearson correlations for STS-B.""" # Display labels and predictions concat1 = contrib_metrics.streaming_concat(logits) concat2 = contrib_metrics.streaming_concat(label_ids) # Compute Pearson correlation pearson = contrib_metrics.streaming_pearson_correlation( logits, label_ids, weights=is_real_example) # Compute MSE # mse = tf.metrics.mean(per_example_loss) mse = tf.metrics.mean_squared_error( label_ids, logits, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "pred": concat1, "label_ids": concat2, "pearson": pearson, "MSE": mse, "eval_loss": loss, } elif task_name == "cola": def metric_fn(per_example_loss, label_ids, logits, is_real_example): """Compute Matthew's correlations for STS-B.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient tp, tp_op = tf.metrics.true_positives( predictions, label_ids, weights=is_real_example) tn, tn_op = tf.metrics.true_negatives( predictions, label_ids, weights=is_real_example) fp, fp_op = tf.metrics.false_positives( predictions, label_ids, weights=is_real_example) fn, fn_op = tf.metrics.false_negatives( predictions, label_ids, weights=is_real_example) # Compute Matthew's correlation mcc = tf.div_no_nan( tp * tn - fp * fn, tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn), 0.5)) # Compute accuracy accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "matthew_corr": (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)), "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] # Note: We keep this feature name `next_sentence_labels` to be compatible # with the original data created by lanzhzh@. However, in the ALBERT case # it does represent sentence_order_labels. sentence_order_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( albert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (sentence_order_loss, sentence_order_example_loss, sentence_order_log_probs) = get_sentence_order_output( albert_config, model.get_pooled_output(), sentence_order_labels) total_loss = masked_lm_loss + sentence_order_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: tf.logging.info("number of hidden group %d to initialize", albert_config.num_hidden_groups) num_of_initialize_group = 1 if FLAGS.init_from_group0: num_of_initialize_group = albert_config.num_hidden_groups if albert_config.net_structure_type > 0: num_of_initialize_group = albert_config.num_hidden_layers (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint, num_of_initialize_group) if use_tpu: def tpu_scaffold(): for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(*args): """Computes the loss and accuracy of the model.""" (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels) = args[:7] masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) metrics = { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, } sentence_order_log_probs = tf.reshape( sentence_order_log_probs, [-1, sentence_order_log_probs.shape[-1]]) sentence_order_predictions = tf.argmax( sentence_order_log_probs, axis=-1, output_type=tf.int32) sentence_order_labels = tf.reshape(sentence_order_labels, [-1]) sentence_order_accuracy = tf.metrics.accuracy( labels=sentence_order_labels, predictions=sentence_order_predictions) sentence_order_mean_loss = tf.metrics.mean( values=sentence_order_example_loss) metrics.update({ "sentence_order_accuracy": sentence_order_accuracy, "sentence_order_loss": sentence_order_mean_loss }) return metrics metric_values = [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels ] eval_metrics = (metric_fn, metric_values) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] input_cdc_ids = features["input_cdc_ids"] age = features["age"] sex_ids = features["sex_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, probabilities, predictions) = \ create_model(albert_config, is_training, input_ids, input_mask, segment_ids, input_cdc_ids, age, sex_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() pprint(tvars) initialized_variable_names = {} scaffold_fn = None if init_checkpoint or FLAGS.cdc_init_checkpoint: if init_checkpoint: print('Loading ALBERT model') (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) pprint(assignment_map) if FLAGS.cdc_init_checkpoint: print('Loading CDC model') cdc_map = {} for var in tvars: if var.name.startswith('cdc/'): name = str.replace(var.name, ':0', '') cdc_map[name] = name initialized_variable_names[var.name] = 1 pprint(cdc_map) if use_tpu: def tpu_scaffold(): if init_checkpoint: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if FLAGS.cdc_init_checkpoint: tf.train.init_from_checkpoint( FLAGS.cdc_init_checkpoint, cdc_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: if init_checkpoint: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if FLAGS.cdc_init_checkpoint: tf.train.init_from_checkpoint(FLAGS.cdc_init_checkpoint, cdc_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, predictions, is_real_example): accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) mean_per_class_accuracy = tf.metrics.mean_per_class_accuracy( labels=label_ids, predictions=predictions, num_classes=num_labels, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, "eval_mean_per_class_accuracy": mean_per_class_accuracy, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, predictions, is_real_example ]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] token_label_ids = features["token_label_ids"] predicate_matrix_ids = features["predicate_matrix_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(token_label_ids), dtype=tf.float32) # TO DO is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, predicate_head_select_loss, predicate_head_probabilities, predicate_head_predictions, token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, token_label_ids, predicate_matrix_ids, num_token_labels, num_predicate_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(predicate_head_select_loss, token_label_per_example_loss, token_label_ids, token_label_logits, is_real_example): token_label_predictions = tf.argmax(token_label_logits, axis=-1, output_type=tf.int32) token_label_pos_indices_list = list( range(num_token_labels) )[4:] # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set pos_indices_list = token_label_pos_indices_list[: -1] # do not care "O" token_label_precision_macro = tf_metrics.precision( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_recall_macro = tf_metrics.recall( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_f_macro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_precision_micro = tf_metrics.precision( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_recall_micro = tf_metrics.recall( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_f_micro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_loss = tf.metrics.mean( values=token_label_per_example_loss, weights=is_real_example) predicate_head_select_loss = tf.metrics.mean( values=predicate_head_select_loss) return { "predicate_head_select_loss": predicate_head_select_loss, "eval_token_label_precision(macro)": token_label_precision_macro, "eval_token_label_recall(macro)": token_label_recall_macro, "eval_token_label_f(macro)": token_label_f_macro, "eval_token_label_precision(micro)": token_label_precision_micro, "eval_token_label_recall(micro)": token_label_recall_micro, "eval_token_label_f(micro)": token_label_f_micro, "eval_token_label_loss": token_label_loss, } eval_metrics = (metric_fn, [ predicate_head_select_loss, token_label_per_example_loss, token_label_ids, token_label_logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "predicate_head_probabilities": predicate_head_probabilities, "predicate_head_predictions": predicate_head_predictions, "token_label_predictions": token_label_predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, probabilities, logits, predictions) = create_model(albert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, task_name, hub_module) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # print("###metric_fn.logits:",logits.shape) # (?,80) # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # print("###metric_fn.label_ids:",label_ids.shape,";predictions:",predictions.shape) # label_ids: (?,80);predictions:(?,) num_aspects = 10 # magic number logits_split = tf.split( logits, num_aspects, axis=-1) # a list. length is num_aspects label_ids_split = tf.split( logits, num_aspects, axis=-1) # a list. length is num_aspects accuracy = tf.constant(0.0, dtype=tf.float64) for j, logits in enumerate(logits_split): # # accuracy = tf.metrics.accuracy(label_ids, predictions) predictions = tf.argmax( logits, axis=-1, output_type=tf.int32) # should be [batch_size,] label_id_ = tf.cast(tf.argmax(label_ids_split[j], axis=-1), dtype=tf.int32) tf.logging.debug( "label_ids_split[j] = %s; predictions = %s; label_id_ = %s" % (label_ids_split[j], predictions, label_id_)) current_accuracy, update_op_accuracy = tf.metrics.accuracy( labels=label_id_, predictions=predictions, weights=is_real_example) accuracy += tf.cast(current_accuracy, dtype=tf.float64) accuracy = accuracy / tf.constant(num_aspects, dtype=tf.float64) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": (accuracy, update_op_accuracy), "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] # Note: We keep this feature name `next_sentence_labels` to be compatible # with the original data created by lanzhzh@. However, in the ALBERT case # it does represent sentence_order_labels. sentence_order_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) tags = set() if is_training: tags.add("train") albert_module = hub.Module(albert_hub_module_handle, tags=tags, trainable=True) tok = create_tokenizer_from_hub_module(albert_module) vocab_size = len(tok.vocab) albert_mlm_inputs = dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, mlm_positions=masked_lm_positions) albert_outputs = albert_module(albert_mlm_inputs, signature="mlm", as_dict=True) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( albert_outputs["sequence_output"], albert_outputs["mlm_logits"], vocab_size, masked_lm_positions, masked_lm_ids, masked_lm_weights) (sentence_order_loss, sentence_order_example_loss, sentence_order_log_probs) = get_sentence_order_output( albert_outputs["pooled_output"], sentence_order_labels) total_loss = masked_lm_loss + sentence_order_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(*args): """Computes the loss and accuracy of the model.""" (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels) = args[:7] masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) metrics = { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, } sentence_order_log_probs = tf.reshape( sentence_order_log_probs, [-1, sentence_order_log_probs.shape[-1]]) sentence_order_predictions = tf.argmax( sentence_order_log_probs, axis=-1, output_type=tf.int32) sentence_order_labels = tf.reshape(sentence_order_labels, [-1]) sentence_order_accuracy = tf.metrics.accuracy( labels=sentence_order_labels, predictions=sentence_order_predictions) sentence_order_mean_loss = tf.metrics.mean( values=sentence_order_example_loss) metrics.update({ "sentence_order_accuracy": sentence_order_accuracy, "sentence_order_loss": sentence_order_mean_loss }) return metrics metric_values = [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels ] eval_metrics = (metric_fn, metric_values) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec