def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, probabilities, logits, predictions) = \ create_model(albert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, task_name, hub_module) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: if task_name not in ["sts-b", "cola"]: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean( values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } elif task_name == "sts-b": def metric_fn(per_example_loss, label_ids, logits, is_real_example): """Compute Pearson correlations for STS-B.""" # Display labels and predictions concat1 = contrib_metrics.streaming_concat(logits) concat2 = contrib_metrics.streaming_concat(label_ids) # Compute Pearson correlation pearson = contrib_metrics.streaming_pearson_correlation( logits, label_ids, weights=is_real_example) # Compute MSE # mse = tf.metrics.mean(per_example_loss) mse = tf.metrics.mean_squared_error( label_ids, logits, weights=is_real_example) loss = tf.metrics.mean( values=per_example_loss, weights=is_real_example) return {"pred": concat1, "label_ids": concat2, "pearson": pearson, "MSE": mse, "eval_loss": loss,} elif task_name == "cola": def metric_fn(per_example_loss, label_ids, logits, is_real_example): """Compute Matthew's correlations for STS-B.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient tp, tp_op = tf.metrics.true_positives( predictions, label_ids, weights=is_real_example) tn, tn_op = tf.metrics.true_negatives( predictions, label_ids, weights=is_real_example) fp, fp_op = tf.metrics.false_positives( predictions, label_ids, weights=is_real_example) fn, fn_op = tf.metrics.false_negatives( predictions, label_ids, weights=is_real_example) # Compute Matthew's correlation mcc = tf.div_no_nan( tp * tn - fp * fn, tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn), 0.5)) # Compute accuracy accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean( values=per_example_loss, weights=is_real_example) return {"matthew_corr": (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)), "eval_accuracy": accuracy, "eval_loss": loss,} eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }, scaffold_fn=scaffold_fn) return output_spec
def __init__(self, is_training): # Training or not self.is_training = is_training # Placeholder self.input_ids = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='input_ids') self.input_masks = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='input_masks') self.segment_ids = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='segment_ids') self.label_ids = tf.placeholder(tf.float32, shape=[None, hp.num_labels], name='label_ids') # Load BERT model self.model = modeling.AlbertModel(config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # Get the feature vector by BERT output_layer_init = self.model.get_sequence_output() # Cell TextCNN output_layer = cell_textcnn(output_layer_init, self.is_training) # Hidden size hidden_size = output_layer.shape[-1].value # Full-connection with tf.name_scope("Full-connection"): output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) logits = tf.nn.bias_add( tf.matmul(output_layer, output_weights, transpose_b=True), output_bias) # Prediction sigmoid(Multi-label) self.probabilities = tf.nn.sigmoid(logits) with tf.variable_scope("Prediction"): # Prediction zero = tf.zeros_like(logits) one = tf.ones_like(logits) self.predictions = tf.where(logits < 0.5, x=zero, y=one) with tf.variable_scope("loss"): # Summary for tensorboard if self.is_training: self.accuracy = tf.reduce_mean( tf.to_float(tf.equal(self.predictions, self.label_ids))) tf.summary.scalar('accuracy', self.accuracy) # Initial embedding by BERT ckpt = tf.train.get_checkpoint_state(hp.saved_model_path) checkpoint_suffix = ".index" if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + checkpoint_suffix): print('=' * 10, 'Restoring model from checkpoint!', '=' * 10) print("%s - Restoring model from checkpoint ~%s" % (time_now_string(), ckpt.model_checkpoint_path)) else: print('=' * 10, 'First time load BERT model!', '=' * 10) tvars = tf.trainable_variables() if hp.init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, hp.init_checkpoint) tf.train.init_from_checkpoint(hp.init_checkpoint, assignment_map) # Loss and Optimizer if self.is_training: # Global_step self.global_step = tf.Variable(0, name='global_step', trainable=False) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=self.label_ids, logits=logits) self.loss = tf.reduce_mean(per_example_loss) # Optimizer BERT train_examples = processor.get_train_examples(hp.data_dir) num_train_steps = int( len(train_examples) / hp.batch_size * hp.num_train_epochs) num_warmup_steps = int(num_train_steps * hp.warmup_proportion) print('num_train_steps', num_train_steps) self.optimizer = optimization.create_optimizer( self.loss, hp.learning_rate, num_train_steps, num_warmup_steps, hp.use_tpu, Global_step=self.global_step) # Summary for tensorboard tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all()