is_training=training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, history_answer_marker=history_answer_marker, use_one_hot_embeddings=False ) (start_logits, end_logits) = cqa_model(bert_representation) tvars = tf.trainable_variables() initialized_variable_names = {} if FLAGS.init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(tvars, FLAGS.init_checkpoint) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) # compute loss seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot( positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean(tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss # get the max prob for the predicted start/end position start_probs = tf.nn.softmax(start_logits, axis=-1) start_prob = tf.reduce_max(start_probs, axis=-1) end_probs = tf.nn.softmax(end_logits, axis=-1)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax( masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape( masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax( next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError( "Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def _creat_bert(is_training, features, bert_config, use_one_hot_embeddings, init_checkpoint): global initialized_variable_names input_ids = features["input_ids"] if "input_extract" in features: input_extract = features["input_extract"] input1_extract = None input2_extract = None else: input_extract = None input1_extract = features["input1_extract"] input2_extract = features["input2_extract"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] q_type = features["q_type"] label_ids = features["label_ids"] model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) print("initialing checkpoint finished") # tf.logging.info("**** Trainable Variables ****") # residue = [] # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # else: # residue.append(var) # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) all_layers = model.get_all_encoder_layers() layer_indexes = [-1, -2, -3, -4] predictions = { "input_extract": input_extract, "input1_extract": input1_extract, "input2_extract": input2_extract, "embedding": model.get_embedding_output(), "input_mask": input_mask, "q_type": q_type, "label_ids": label_ids } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] return predictions
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["hist_len"] hist_len = features['hist_len'] print('hist_len的形状是', hist_len) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, hist_len, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) num_gpus = n_gpus if is_training: optimizer = optimization.create_optimizer_mgpu( learning_rate, num_train_steps, num_warmup_steps) else: num_gpus = 1 input_ids_list = tf.split(features["input_ids"], num_or_size_splits=num_gpus, axis=0) input_mask_list = tf.split(features["input_mask"], num_or_size_splits=num_gpus, axis=0) segment_ids_list = tf.split(features["segment_ids"], num_or_size_splits=num_gpus, axis=0) masked_lm_positions_list = tf.split(features["masked_lm_positions"], num_or_size_splits=num_gpus, axis=0) masked_lm_ids_list = tf.split(features["masked_lm_ids"], num_or_size_splits=num_gpus, axis=0) masked_lm_weights_list = tf.split(features["masked_lm_weights"], num_or_size_splits=num_gpus, axis=0) next_sentence_labels_list = tf.split(features["next_sentence_labels"], num_or_size_splits=num_gpus, axis=0) tower_grads = [] train_perplexity = 0 for index in range(num_gpus): with tf.name_scope('replica_%d' % index): with tf.device('/gpu:%d' % index): model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids_list[index], input_mask=input_mask_list[index], token_type_ids=segment_ids_list[index], use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions_list[index], masked_lm_ids_list[index], masked_lm_weights_list[index]) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels_list[index]) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = {} if init_checkpoint and index == 0: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) for var in tvars: param_name = var.name[:-2] tf.get_variable(name=param_name + "/adam_m", shape=var.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) tf.get_variable(name=param_name + "/adam_v", shape=var.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if is_training: # reuse variables tf.get_variable_scope().reuse_variables() loss = total_loss # get gradients grads = optimizer.compute_gradients( loss, aggregation_method=tf.AggregationMethod. EXPERIMENTAL_TREE, ) tower_grads.append(grads) # keep track of loss across all GPUs train_perplexity += loss if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() new_global_step = global_step + 1 average_grads = average_gradients(tower_grads, None, None) average_grads, norm_summary_ops = clip_grads( average_grads, 1.0, True, global_step) train_op = optimizer.apply_gradients(average_grads) train_op = tf.group(train_op, [global_step.assign(new_global_step)]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=train_perplexity / num_gpus, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids_list[0], masked_lm_weights_list[0], next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels_list[0] ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits) = create_model( bert_config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot( positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss start_positions = features["start_positions"] end_positions = features["end_positions"] start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "unique_ids": unique_ids, "start_logits": start_logits, "end_logits": end_logits, } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError( "Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
new_bert_representation, new_mtl_input, attention_weights = history_attention_net( bert_representation, history_attention_input, mtl_input, slice_mask, slice_num) (start_logits, end_logits) = cqa_model(new_bert_representation) yesno_logits = yesno_model(new_mtl_input) followup_logits = followup_model(new_mtl_input) domain_logits = domain_model(new_mtl_input) tvars = tf.trainable_variables() # print(tvars) initialized_variable_names = {} if FLAGS.init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint( tvars, FLAGS.init_checkpoint) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) # print('tvars',tvars) # print('initialized_variable_names',initialized_variable_names) # compute loss seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot(positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
def model_fn(features, labels, mode, params): example_id = features["example_id"] pos_input_ids = features["pos_input_ids"] pos_input_mask = features["pos_input_mask"] pos_segment_ids = features["pos_segment_ids"] neg_input_ids = features["neg_input_ids"] neg_input_mask = features["neg_input_mask"] neg_segment_ids = features["neg_segment_ids"] is_training = (mode == tfes.estimator.ModeKeys.TRAIN) pos_logits = create_model_or_use_model(bert_config=bert_config, is_training=is_training, input_ids=pos_input_ids, input_mask=pos_input_mask, segment_ids=pos_segment_ids) neg_logits = create_model_or_use_model(bert_config=bert_config, is_training=is_training, input_ids=neg_input_ids, input_mask=neg_input_mask, segment_ids=neg_segment_ids) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # tf.logging.info("**** Trainable Variables ****") # for var in tvars: # init_string = "" # if var.name[6:] in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) if mode == tfes.estimator.ModeKeys.TRAIN: original_loss = tf.nn.relu(margin - pos_logits + neg_logits) total_loss = tf.reduce_mean(original_loss) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) output_spec = tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tfes.estimator.ModeKeys.PREDICT: predictions = { "pos_logit": pos_logits, "neg_logit": neg_logits, "example_id": example_id } output_spec = tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % mode) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # def metric_fn(per_example_loss, label_ids, logits): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # accuracy = tf.metrics.accuracy(label_ids, predictions) # loss = tf.metrics.mean(per_example_loss) # return { # "eval_accuracy": accuracy, # "eval_loss": loss, # } def metric_fn(per_example_loss, label_ids, logits): # Display labels and predictions concat1 = tf.contrib.metrics.streaming_concat(logits) concat2 = tf.contrib.metrics.streaming_concat(label_ids) # Compute Pearson correlation pearson = tf.contrib.metrics.streaming_pearson_correlation( logits, label_ids) # Compute MSE # mse = tf.metrics.mean(per_example_loss) mse = tf.metrics.mean_squared_error(label_ids, logits) # Compute Spearman correlation size = tf.size(logits) indice_of_ranks_pred = tf.nn.top_k(logits, k=size)[1] indice_of_ranks_label = tf.nn.top_k(label_ids, k=size)[1] rank_pred = tf.nn.top_k(-indice_of_ranks_pred, k=size)[1] rank_label = tf.nn.top_k(-indice_of_ranks_label, k=size)[1] rank_pred = tf.to_float(rank_pred) rank_label = tf.to_float(rank_label) spearman = tf.contrib.metrics.streaming_pearson_correlation( rank_pred, rank_label) return { 'pred': concat1, 'label_ids': concat2, 'pearson': pearson, 'spearman': spearman, 'MSE': mse } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def _creat_bert(is_training, features, bert_config, use_one_hot_embeddings, init_checkpoint, layer_num, plus_position): global initialized_variable_names input_ids = features["input_ids"] if "input_extract" in features: input_extract = features["input_extract"] input1_extract = None input2_extract = None input3_extract = None elif "input3_extract" not in features: input_extract = None input1_extract = features["input1_extract"] input2_extract = features["input2_extract"] input3_extract = None else: input_extract = None input1_extract = features["input1_extract"] input2_extract = features["input2_extract"] input3_extract = features["input3_extract"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] q_type = features["q_type"] label_ids = features["label_ids"] model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, output_layer_index=layer_num, plus_position=plus_position) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # print("initialing checkpoint finished") # tf.logging.info("**** Trainable Variables ****") # residue = [] # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # else: # residue.append(var) # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) predictions = {"input_extract": input_extract, "input1_extract": input1_extract, "input2_extract": input2_extract, "input3_extract": input3_extract, "embedding": model.get_embedding_output(), "input_mask": input_mask, "q_type": q_type, "label_ids": label_ids, "output_layer": model.get_output_layer(), "last_layer": model.get_sequence_output()} return predictions
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" global initialized_variable_names tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_extract = features["input_extract"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] q_type = features["q_type"] label_ids = features["label_ids"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") residue = [] for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" else: residue.append(var) tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) all_layers = model.get_all_encoder_layers() layer_indexes = [ -1, ] predictions = { "input_extract": input_extract, "q_type": q_type, "label_ids": label_ids } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] output_spec = tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) num_gpus = n_gpus if is_training: optimizer = optimization.create_optimizer_mgpu(learning_rate, num_train_steps, num_warmup_steps) else: num_gpus=1 input_ids_list = tf.split(features["input_ids"], num_or_size_splits=num_gpus, axis=0) input_mask_list = tf.split(features["input_mask"], num_or_size_splits=num_gpus, axis=0) segment_ids_list = tf.split(features["segment_ids"], num_or_size_splits=num_gpus, axis=0) label_ids_list = tf.split(features["label_ids"], num_or_size_splits=num_gpus, axis=0) tower_grads = [] train_perplexity = 0 for index in range(num_gpus): with tf.name_scope('replica_%d' % index): with tf.device('/gpu:%d' % index): (total_loss, per_example_loss, logits) = create_model( bert_config, is_training, input_ids_list[index], input_mask_list[index], segment_ids_list[index], label_ids_list[index], num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint( tvars, init_checkpoint) for var in tvars: param_name = var.name[:-2] tf.get_variable( name=param_name + "/adam_m", shape=var.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) tf.get_variable( name=param_name + "/adam_v", shape=var.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") tf.logging.info('device: %d init' % index) if index == 0: for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if is_training: # reuse variables tf.get_variable_scope().reuse_variables() loss = total_loss # get gradients grads = optimizer.compute_gradients( loss, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE, ) tower_grads.append(grads) # keep track of loss across all GPUs train_perplexity += loss if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() new_global_step = global_step + 1 average_grads = average_gradients(tower_grads, None, None) #average_grads, norm_summary_ops = clip_grads(average_grads, 1.0, True, global_step) train_op = optimizer.apply_gradients(average_grads) train_op = tf.group(train_op, [global_step.assign(new_global_step)]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=train_perplexity / n_gpus, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ 'predictions': predictions, }) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids_list[0], logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec