def model_fn(features, labels, mode, params): input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_predicting = (mode == tf.estimator.ModeKeys.PREDICT) # TRAIN and EVAL if not is_predicting: (loss, predicted_labels, log_probs) = create_model(is_training, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels) train_op = optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) # Calculate evaluation metrics. def metric_fn(label_ids, predicted_labels): accuracy = tf.metrics.accuracy(label_ids, predicted_labels) # f1_score = tf.contrib.metrics.f1_score( # label_ids, # predicted_labels) # auc = tf.metrics.auc( # label_ids, # predicted_labels) # recall = tf.metrics.recall( # label_ids, # predicted_labels) # precision = tf.metrics.precision( # label_ids, # predicted_labels) # true_pos = tf.metrics.true_positives( # label_ids, # predicted_labels) # true_neg = tf.metrics.true_negatives( # label_ids, # predicted_labels) # false_pos = tf.metrics.false_positives( # label_ids, # predicted_labels) # false_neg = tf.metrics.false_negatives( # label_ids, # predicted_labels) print('***** Accuracy {}'.format(accuracy)) return { "eval_accuracy": accuracy, # "f1_score": f1_score, # "auc": auc, # "precision": precision, # "recall": recall, # "true_positives": true_pos, # "true_negatives": true_neg, # "false_positives": false_pos, # "false_negatives": false_neg } eval_metrics = metric_fn(label_ids, predicted_labels) if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) else: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics) else: (predicted_labels, log_probs) = create_model(is_training, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels) predictions = { 'probabilities': log_probs, 'labels': predicted_labels } return tf.estimator.EstimatorSpec(mode, predictions=predictions)
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] #label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, 13, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") recall = tf_metrics.recall(label_ids, predictions, 13, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") f = tf_metrics.f1(label_ids, predictions, 13, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # eval 的 计算方式metric需要自己定义修改 def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E, # 具体见 tf.metrics里的函数 precision = tf_metrics.precision(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") recall = tf_metrics.recall(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") f = tf_metrics.f1(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } # def metric_fn(per_example_loss, label_ids, logits, is_real_example): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # accuracy = tf.metrics.accuracy( # labels=label_ids, predictions=predictions, weights=is_real_example) # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # return { # "eval_accuracy": accuracy, # "eval_loss": loss, # } # eval_metrics = (metric_fn, # [per_example_loss, label_ids, logits, is_real_example]) eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 #全部损失,分数,,预测类别 total_loss, logits, trans, pred_ids = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) # tf.summary.scalar('total_loss', total_loss) # tf.summary.scalar('logits',logits) # tf.summary.scalar('trans',trans) # tf.summary.scalar('pred_ids',pred_ids) #所有需要训练的变量 tvars = tf.trainable_variables() # 加载BERT模型,assignmen_map,加载的预训练变量值 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 打印变量名 # logger.info("**** Trainable Variables ****") # # # 打印加载模型的参数 # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # logger.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: #train_op = optimizer.optimizer(total_loss, learning_rate, num_train_steps) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.save_summary_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): return { "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids), } eval_metrics = metric_fn(label_ids, pred_ids) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("***features***") #print(features) input_ids = features['input_ids'] input_mask = features['input_mask'] segment_ids = features['segment_ids'] domain_id = features['domain_id'] intent_id = features['intent_id'] slot_id = features['slot_id'] is_real_example = None #含义 if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(domain_id), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) domain_loss, intent_loss, slot_loss, domain_pred, intent_pred, slot_pred = \ create_model(bert_config, is_training, input_ids, input_mask, segment_ids, \ domain_id, intent_id, slot_id, num_domain, num_intent, num_slot,\ use_one_hot_embeddings, np.array(domain_w, dtype=np.float32), np.array(intent_w, dtype=np.float32)) ''' total_loss, domain_pred, intent_pred, slot_pred = \ create_model(bert_config, is_training, input_ids, input_mask, segment_ids, \ domain_id, intent_id, slot_id, num_domain, num_intent, num_slot,\ use_one_hot_embeddings) ''' tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None #加载模型 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_global_step() #print("global_step: ", global_step) ''' if num_train_steps < 1000: total_loss = domain_loss + intent_loss + slot_loss else: total_loss = domain_loss + intent_loss + (domain_loss + intent_loss) / slot_loss * slot_loss ''' total_loss = domain_loss + intent_loss + 2 * slot_loss train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) #EstimatorSpec的使用 output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "domain_pred": domain_pred, "intent_pred": intent_pred, "slot_pred": slot_pred }, scaffold=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") # logging 用来记录日志 for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, logits, predicts) = create_model( # 使用BERT的接口建模 bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() # 得到所有要训练的变量 scaffold_fn = None if init_checkpoint: # 用BERT预加载模型,这里加载的只有BERT预训练的模型 (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 使用预训练模型 if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: # 这里输出的是预加载模型中的向量格式 init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # 在训练阶段 train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) # 创建一个Adam优化器 output_spec = tf.contrib.tpu.TPUEstimatorSpec( # TPU运行时的特殊 estimator mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # 评估阶段 def metric_fn(label_ids, predicts, valid_labels): # def metric_fn(label_ids, logits): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) #直接计算第三维最大值为预测值 precision = tf_metrics.precision( label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算正确率 recall = tf_metrics.recall(label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算召回率 f = tf_metrics.f1(label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算F值 # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, predicts, valid_labels]) # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 total_loss, logits, trans, pred_ids = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, False) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook(hook_dict, every_n_iter=100) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 # def metric_fn(label_ids, pred_ids): # return { # "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids), # } # eval_metrics = metric_fn(label_ids, pred_ids) # output_spec = tf.estimator.EstimatorSpec( # mode=mode, # loss=total_loss, # eval_metric_ops=eval_metrics # ) def metric_fn(label_ids, pred_ids): precision = tf_metrics.precision(label_ids, pred_ids, 11, [2, 3, 4, 5, 6, 7], average="macro") recall = tf_metrics.recall(label_ids, pred_ids, 11, [2, 3, 4, 5, 6, 7], average="macro") f = tf_metrics.f1(label_ids, pred_ids, 11, [2, 3, 4, 5, 6, 7], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = metric_fn(label_ids, pred_ids) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = self.create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, probabilities, is_real_example): logits_split = tf.split(probabilities, num_labels, axis=-1) label_ids_split = tf.split(label_ids, num_labels, axis=-1) # metrics change to auc of every class eval_dict = {} for j, logits in enumerate(logits_split): label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32) current_auc, update_op_auc = tf.metrics.auc( label_id_, logits) eval_dict[str(j)] = (current_auc, update_op_auc) eval_dict['eval_loss'] = tf.metrics.mean( values=per_example_loss) return eval_dict eval_metrics = metric_fn(per_example_loss, label_ids, probabilities, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics, scaffold=scaffold_fn) else: print("mode:", mode, "probabilities:", probabilities) output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["order"] decoder_inputs = features["decoder_input"] sep_positions = features["sep_positions"] if add_masking: masked_lm_positions = features["mask_indices"] masked_lm_ids = features["target_token_ids"] masked_lm_weights = features["target_token_weights"] # is_real_example = None # if "is_real_example" in features: # is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) # else: # is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (order_loss, per_example_loss, logits, probabilities) = model_builder.create_model( model, label_ids, decoder_inputs, FLAGS.train_batch_size if is_training else FLAGS.eval_batch_size, FLAGS.order_model_type, sep_positions) if add_masking: # masked_lm_example_lossmasked_lm_log_probs (masked_lm_loss, _, _) = model_builder.get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) if add_masking: total_loss = order_loss + masked_lm_loss else: total_loss = order_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) # This probably shouldn't be done after the first time for key in assignment_map.keys(): if "bert/embeddings/token_type_embeddings" in key: del assignment_map[key] if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): # , is_real_example): packed_logits = tf.reshape( tf.convert_to_tensor(logits, dtype=tf.float32), (-1, 5, 5)) predictions = tf.argmax(packed_logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions) #, weights=is_real_example) loss = tf.metrics.mean( values=per_example_loss) #, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) # , is_real_example]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def train(): parser = configparser.ConfigParser() parser.read('params.ini') max_x_len = int(parser.get('chat_model', 'max_x_len')) max_y_len = int(parser.get('chat_model', 'max_y_len')) decode_max_len = int(parser.get('chat_model', 'decode_max_len')) vocab_file = parser.get('chat_model', 'vocab_file') config_file = parser.get('chat_model', 'config_file') ckpt_file = parser.get('chat_model', 'ckpt_file') beam_width = int(parser.get('chat_model', 'beam_width')) batch_size = int(parser.get('chat_model', 'batch_size')) lr = float(parser.get('chat_model', 'lr')) dropout_rate = float(parser.get('chat_model', 'dropout_rate')) train_nums = int(parser.get('chat_model', 'train_data_size')) warmup_proportion = float(parser.get('chat_model', 'warmup_proportion')) epochs = int(parser.get('chat_model', 'epochs')) length_penalty_weight = float(parser.get('chat_model', 'length_penalty_weight')) coverage_penalty_weight = float(parser.get('chat_model', 'coverage_penalty_weight')) log_dir = parser.get('chat_model', 'log_dir') data_dir = parser.get('chat_model', 'data_dir') train_file = parser.get('chat_model', 'train_file') train_file = os.path.join(data_dir, train_file) # vocab_file = './model/chinese_L-12_H-768_A-12/vocab.txt' tokenizer = tokenization.FullTokenizer(vocab_file) chatmodel_config = ChatModelConfig( max_x_len, max_y_len, decode_max_len, tokenizer.vocab, config_file, dropout_rate, ckpt_file, beam_width, coverage_penalty_weight, length_penalty_weight ) os.makedirs(log_dir, exist_ok=True) graph = tf.Graph() step = 0 eval_log = [] with graph.as_default(): input_fn = file_based_input_fn_builder(train_file, max_x_len, max_y_len, True, True) ds = input_fn(batch_size) iterator = ds.make_one_shot_iterator() batch_inputs = iterator.get_next() chat_model = ChatModel(chatmodel_config) loss, distance, predictions, train_predictions = chat_model.loss() num_train_steps = int(train_nums/batch_size*epochs) num_warmup_steps = int(num_train_steps * warmup_proportion) train_op = optimization.create_optimizer( loss, lr, num_train_steps, num_warmup_steps, False) # saver = tf.train.Saver() # scaf = tf.train.Scaffold(saver=saver) tf.Session().run(tf.global_variables_initializer()) with tf.train.MonitoredTrainingSession(checkpoint_dir=log_dir, hooks=[tf.train.StopAtStepHook(last_step=num_train_steps), tf.train.NanTensorHook(loss)], config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: best_loss = float('inf') best_acc = 0 try: while not sess.should_stop(): trainDatas = sess._tf_sess().run(batch_inputs) feed_dict = make_feed_dict(chat_model, trainDatas, 0.1) train_loss, _ = sess._tf_sess().run( [loss, train_op], feed_dict=feed_dict ) if step % 100 == 0: print('====> step:{:06d}|{}\t[train loss:{:.3f}]'.format( step, num_train_steps, train_loss)) eval_val, train_val = sess._tf_sess().run([predictions, train_predictions], feed_dict) print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][0]))) print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][0]))) print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][1]))) print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][1]))) print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][2]))) print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][2]))) print('question:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['x'][3]))) print('groud truth:\t', ''.join(tokenizer.convert_ids_to_tokens(trainDatas['y'][3]))) # v1 = train_val[0] # v1[v1<0] = 100 # v2 = train_val[1] # v2[v2 < 0] = 100 # v3 = train_val[2] # v3[v3 < 0] = 100 # v4 = train_val[3] # v4[v4 < 0] = 100 # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v1))) # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v2))) # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v3))) # print('train predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(v4))) # print(train_val.shape) print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[0]))) print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[1]))) print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[2]))) print('predictions:\t', ''.join(tokenizer.convert_ids_to_tokens(eval_val[3]))) print(eval_val.shape) print('\n') eval_log.append(tokenizer.convert_ids_to_tokens(eval_val[0])) eval_log.append(eval_val[0]) step += 1 except KeyboardInterrupt as e: # with open('./log/eval_log.txt', 'w', encoding='utf8') as f: # for log in eval_log: # f.write(' '.join(list(log))) # f.write('\n') # saver.save(sess._sess, os.path.join(log_dir, 'except_model'), global_step=tf.train.get_or_create_global_step()) print(e) except Exception as e: print(e)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) #(next_sentence_loss, next_sentence_example_loss, #next_sentence_log_probs) = get_next_sentence_output( #bert_config, model.get_pooled_output(), next_sentence_labels) #total_loss = masked_lm_loss + next_sentence_loss total_loss = masked_lm_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights): # , next_sentence_example_loss, # next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) #next_sentence_log_probs = tf.reshape( # next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) #next_sentence_predictions = tf.argmax( # next_sentence_log_probs, axis=-1, output_type=tf.int32) #next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) #next_sentence_accuracy = tf.metrics.accuracy( # labels=next_sentence_labels, predictions=next_sentence_predictions) #next_sentence_mean_loss = tf.metrics.mean( # values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss #"next_sentence_accuracy": next_sentence_accuracy, #"next_sentence_loss": next_sentence_mean_loss, } # Since the tf.metrics are rolling averages, it does not make # sense to have them for training. The method below instead # calculate the per batch accuracy, which is used for training. def metric_fn_train_batch(masked_lm_log_probs, masked_lm_ids, masked_lm_weights): masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) if masked_lm_ids.dtype != masked_lm_predictions.dtype: masked_lm_predictions = tf.cast(masked_lm_predictions, masked_lm_ids.dtype) is_correct = tf.to_float( math_ops.equal(masked_lm_predictions, masked_lm_ids)) if masked_lm_weights is None: count = tf.to_float(tf.size(is_correct)) else: masked_lm_weights = weights_broadcast_ops.broadcast_weights( tf.to_float(masked_lm_weights), is_correct) is_correct = tf.math.multiply(is_correct, masked_lm_weights) count = tf.math.reduce_sum(masked_lm_weights) acc_value = tf.math.reduce_sum(is_correct) / count return dict(masked_lm_accuracy_train_batch=acc_value) # train hooks tensors_logging_hook_train = dict( batch_loss=total_loss, step=tf.train.get_global_step(), ) metrics_train_batch = metric_fn_train_batch(masked_lm_log_probs, masked_lm_ids, masked_lm_weights) tensors_logging_hook_train.update(metrics_train_batch) logging_hook_train = tf.train.LoggingTensorHook( tensors_logging_hook_train, every_n_iter=10) # eval hooks tensors_logging_hook_eval = dict( eval_batch_loss=total_loss, step=tf.train.get_global_step(), ) logging_hook_eval = tf.train.LoggingTensorHook( tensors_logging_hook_eval, every_n_iter=10) eval_hooks = [logging_hook_eval] if eval_saving_path: eval_hooks.append( SavingTensorHook(dict( masked_lm_example_loss=masked_lm_example_loss, masked_lm_log_probs=masked_lm_log_probs, masked_lm_ids=masked_lm_ids, masked_lm_weights=masked_lm_weights, total_loss=total_loss, step=tf.train.get_global_step(), ), eval_saving_path, every_n_iter=1)) # tensorboard summaries for train metrics for metric_name, metric_value in metrics_train_batch.items(): tf.summary.scalar(metric_name, metric_value) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook_train], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = ( metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights #, next_sentence_example_loss, #next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, evaluation_hooks=eval_hooks, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % mode) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] token_label_ids = features["token_label_ids"] predicate_matrix_ids = features["predicate_matrix_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(token_label_ids), dtype=tf.float32) # TO DO is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, predicate_head_select_loss, predicate_head_probabilities, predicate_head_predictions, token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, token_label_ids, predicate_matrix_ids, num_token_labels, num_predicate_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(predicate_head_select_loss, token_label_per_example_loss, token_label_ids, token_label_logits, is_real_example): token_label_predictions = tf.argmax(token_label_logits, axis=-1, output_type=tf.int32) token_label_pos_indices_list = list(range(num_token_labels))[ 4:] # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set pos_indices_list = token_label_pos_indices_list[:-1] # do not care "O" token_label_precision_macro = tf_metrics.precision(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_recall_macro = tf_metrics.recall(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_f_macro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_precision_micro = tf_metrics.precision(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_recall_micro = tf_metrics.recall(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_f_micro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_loss = tf.metrics.mean(values=token_label_per_example_loss, weights=is_real_example) predicate_head_select_loss = tf.metrics.mean(values=predicate_head_select_loss) return { "predicate_head_select_loss": predicate_head_select_loss, "eval_token_label_precision(macro)": token_label_precision_macro, "eval_token_label_recall(macro)": token_label_recall_macro, "eval_token_label_f(macro)": token_label_f_macro, "eval_token_label_precision(micro)": token_label_precision_micro, "eval_token_label_recall(micro)": token_label_recall_micro, "eval_token_label_f(micro)": token_label_f_micro, "eval_token_label_loss": token_label_loss, } eval_metrics = (metric_fn, [predicate_head_select_loss, token_label_per_example_loss, token_label_ids, token_label_logits, is_real_example]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "predicate_head_probabilities": predicate_head_probabilities, "predicate_head_predictions": predicate_head_predictions, "token_label_predictions": token_label_predictions}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) if FLAGS.use_next_sentence_prediction: sample_weights = None if FLAGS.no_nsp_while_masking: sample_weights = tf.cast( tf.math.greater_equal( tf.reduce_sum(masked_lm_weights, axis=1), 0.0), tf.float32) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels, sample_weights) # Compute total weighted loss: # if mlm_loss_weight=1, this amounts to summing up the losses. total_loss = (bert_config.mlm_loss_weight * masked_lm_loss + next_sentence_loss) / ( 1 + bert_config.mlm_loss_weight) * 2 next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) else: total_loss = masked_lm_loss masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.logging.info("**** Assignment map **** %s" % assignment_map) for x in assignment_map: tf.logging.info(x) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) # Host function for saving summaries. def _host_fn(**kwargs): global_step = kwargs.pop("global_step")[0] with tf.compat.v2.summary.create_file_writer( os.path.join(FLAGS.output_dir, "train")).as_default(): with tf.compat.v2.summary.record_summaries_every_n_global_steps( FLAGS.steps_per_summary, global_step): for name, tensor in kwargs.items(): tf.compat.v2.summary.scalar(name, tf.reduce_mean(tensor), step=global_step) return tf.summary.all_v2_summary_ops() global_step = tf.train.get_or_create_global_step() if FLAGS.use_next_sentence_prediction: host_inputs = { "global_step": tf.expand_dims(global_step, 0), "loss/mlm_loss": tf.expand_dims(masked_lm_loss, 0), "loss/cls_loss": tf.expand_dims(next_sentence_loss, 0), "loss/total_loss": tf.expand_dims(total_loss, 0), "accuracy/mlm_accuracy": tf.expand_dims(masked_lm_accuracy, 0), "accuracy/cls_accuracy": tf.expand_dims(next_sentence_accuracy, 0), } else: host_inputs = { "global_step": tf.expand_dims(global_step, 0), "loss/mlm_loss": tf.expand_dims(masked_lm_loss, 0), "loss/total_loss": tf.expand_dims(total_loss, 0), "accuracy/mlm_accuracy": tf.expand_dims(masked_lm_accuracy, 0), } host_call = (_host_fn, host_inputs) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # f1 = tf.contrib.metrics.f1_score( # labels=label_ids, predictions=predictions, weights=is_real_example) # r = tf.metrics.recall( # labels=label_ids, predictions=predictions, weights=is_real_example) # p = tf.metrics.precision( # labels=label_ids, predictions=predictions, weights=is_real_example) return { # 'r': r, # 'p': p, # 'f1': f1, "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # 1. 提取Features内容 tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # 2. create_model is_training = (mode == tf.estimator.ModeKeys.TRAIN) total_loss, logits, trans, pred_ids = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) tvars = tf.trainable_variables() if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # 3. 返回EstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} # hook_dict记录损失和步数信息 hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.save_summary_steps) output_spec = tf.estimator.EstimatorSpec( # 必需参数:loss,train_op mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # PROBLEM REMAIN: eval的评估指标 def metric_fn(label_ids, pred_ids): indices = [2, 3, 4] # PROBLEM REMAIN: 与NERProcessor下标对应? weight = tf.sequence_mask(args.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, weight) f1 = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f1 } eval_metrics = metric_fn(label_ids, pred_ids) output_spec = tf.estimator.EstimatorSpec( # 必需参数:loss mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = tf.estimator.EstimatorSpec( # 必需参数:predictions mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # 打印加载模型的参数 # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 钩子,这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): # 首先对结果进行维特比解码 # crf 解码 indices = [2, 3, 4, 5, 6, 7] # indice参数告诉评估矩阵评估哪些标签,与label_list相对应 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, pred_ids]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) # else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" # The function signature is fixed as part of the estimator interface. # We pass task-specific labels as part of `features` and hence `labels` is # unused. `params` is for runtime parameters passed around by the estimator # framework and they are not used by us. # The unused parameters are deleted below. del labels, params tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] candidate_mask = features["candidate_mask"] error_location_mask = features["error_location_mask"] target_mask = features["target_mask"] sequence_length = tf.shape(input_ids)[1] if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(input_ids)[0], dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, _, probabilities) = (create_original_varmisuse_model( bert_config=bert_config, is_training=is_training, enable_sequence_masking=enable_sequence_masking, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, candidate_mask=candidate_mask, target_mask=target_mask, error_location_mask=error_location_mask, use_one_hot_embeddings=use_one_hot_embeddings)) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = ( modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) return output_spec elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, probabilities, error_location_mask, target_mask, is_real_example): """Metric function.""" buggy_mask = tf.equal(error_location_mask[:, 0], 0) non_buggy_mask = tf.logical_not(buggy_mask) location_probabilities, repair_probabilities = tf.unstack( probabilities, axis=2) predicted_error_locations = tf.argmax(location_probabilities, axis=1, output_type=tf.int32) predicted_repair_locations = tf.argmax(repair_probabilities, axis=1, output_type=tf.int32) non_buggy_predictions = tf.equal(predicted_error_locations, 0) predicted_error_locations_one_hot = tf.one_hot( predicted_error_locations, sequence_length, dtype=tf.int32) predicted_repair_locations_one_hot = tf.one_hot( predicted_repair_locations, sequence_length, dtype=tf.int32) classification_accuracy = tf.metrics.accuracy( labels=non_buggy_mask, predictions=non_buggy_predictions, weights=is_real_example) true_positive_rate = tf.metrics.accuracy( labels=non_buggy_mask, predictions=non_buggy_predictions, weights=is_real_example * tf.cast(non_buggy_mask, tf.float32)) correct_location_predictions = tf.reduce_sum(tf.multiply( predicted_error_locations_one_hot, error_location_mask), axis=1) # We can have more than one valid repair locations, so `target_mask` # can have multiple ones in it. The following calculation yields 1 # if the predicted repair location is one of the valid repair locations. correct_repair_predictions = tf.reduce_sum(tf.multiply( predicted_repair_locations_one_hot, target_mask), axis=1) correct_localization_repair_predictions = ( correct_location_predictions * correct_repair_predictions) localization_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_location_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) repair_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_repair_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) localization_repair_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_localization_repair_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy_classification": classification_accuracy, "eval_true_positive_rate": true_positive_rate, "eval_accuracy_localization": localization_accuracy, "eval_accuracy_repair": repair_accuracy, "eval_accuracy_localization_repair": localization_repair_accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, probabilities, error_location_mask, target_mask, is_real_example ]) output_spec = contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) return output_spec else: output_spec = contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): """ 内部函数 :param features: 数据的features,一个字典,接收从input_fn中返回的features :param labels: 数据的labels, 接收从input_fn中返回的labels,但在这里labels一起放置在features中了 :param mode: :param params: :return: """ tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] # 根据input_mask来计算出每条序列的长度,因为input_mask中真实token是1,补全的pad是0 used = tf.sign(tf.abs(input_mask)) sequence_lens = tf.reduce_sum(used, 1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 如果是train,则is_training=True is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (loss, logits, trans_params, pred_y) = create_model( bert_config, is_training, input_ids, input_mask, sequence_lens, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None # 加载bert模型, 初始化变量名,assignment_map和initialized_variable_names都是有序的字典, # assignment_map取出了tvars中所有的变量名,并且键和值都是变量名 if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): # 按照assignment_map中的变量名从init_checkpoint中加载出初始化变量值 tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 打印模型的参数 tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # 创建一个优化训练的op入口 train_op = optimization.create_optimizer( loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) # 将训练时的变量初始化参数,损失和优化器封装起来 output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) # scaffold_fn这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, logits, trans_params): # 获得验证集上的性能指标 weight = tf.sequence_mask(sequence_lens, FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight) f1 = tf_metrics.f1(label_ids, pred_y, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f1": f1, } # 这里eval_metrics必须是一个元祖 eval_metrics = (metric_fn, [label_ids, logits, trans_params]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # 预测时只返回预测的结果 output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_y, scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] mask = features["mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, FLAGS.use_lstm) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") logging.info( "=======================================variables to fine tune=============================================" ) tvars = tvars[FLAGS.layer:] for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, FLAGS.layer) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(label_ids, logits, num_labels, mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask) return {"confusion_matrix": cm} # eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] labels = None labels_mask = None if mode != tf.estimator.ModeKeys.PREDICT: if self._config.use_t2t_decoder: # Account for the begin and end tokens used by Transformer. labels = features["labels"] + 2 else: labels = features["labels"] labels_mask = tf.cast(features["labels_mask"], tf.float32) (total_loss, per_example_loss, predictions) = self._create_model(mode, input_ids, input_mask, segment_ids, labels, labels_mask) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if self._init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, self._init_checkpoint) if self._use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(self._init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(self._init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # for var in tvars: # tf.logging.info("Initializing the model from: %s", # self._init_checkpoint) # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, self._learning_rate, self._num_train_steps, self._num_warmup_steps, self._use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, labels, labels_mask, predictions): """Compute eval metrics.""" accuracy = tf.cast( tf. reduce_all( # tf.reduce_all 相当于"逻辑AND"操作,找到输出完全正确的才算正确 tf.logical_or(tf.equal(labels, predictions), ~tf.cast(labels_mask, tf.bool)), axis=1), tf.float32) return { # This is equal to the Exact score if the final realization step # doesn't introduce errors. "sentence_level_acc": tf.metrics.mean(accuracy), "eval_loss": tf.metrics.mean(per_example_loss), } eval_metrics = (metric_fn, [ per_example_loss, labels, labels_mask, predictions ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"pred": predictions}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) num_choices = 4 read_size = num_choices + 1 # if FLAGS.bilin_preproc else num_choices start_size = 0 # if FLAGS.bilin_preproc else 1 input_ids = [ features["input_ids" + str(i)] for i in range(start_size, read_size) ] input_mask = [ features["input_mask" + str(i)] for i in range(start_size, read_size) ] segment_ids = [ features["segment_ids" + str(i)] for i in range(start_size, read_size) ] label_ids = features["labels"] label_ids = label_ids[:, 4] seq_length = input_ids[0].shape[-1] input_ids = tf.reshape(tf.stack(input_ids, axis=1), [-1, seq_length]) input_mask = tf.reshape(tf.stack(input_mask, axis=1), [-1, seq_length]) segment_ids = tf.reshape(tf.stack(segment_ids, axis=1), [-1, seq_length]) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) if FLAGS.bilin_preproc: (total_loss, per_example_loss, logits, probabilities) = model_builder.create_model_bilin( model, label_ids, num_choices) else: (total_loss, per_example_loss, logits, probabilities) = model_builder.create_model( model, label_ids, num_choices) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, logits, trans, pred_ids) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # 打印加载模型的参数 for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 钩子,这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 eval_list = [] assert True == os.path.exists( os.path.join(FLAGS.output_dir, "eval_ids_list.txt")) list_file = open( os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r') contents = list_file.readlines() for item in contents: eval_list.append(int( item.strip())) ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的 assert 0 < len(eval_list) print("eval_list:", eval_list) weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, eval_list, weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, eval_list, weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list, weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, logits, trans]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) # else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*******Features*******") for name in sorted(features.keys()): tf.logging.info("name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits) = create_model(bert_config = bert_config, is_training = is_training, input_ids = input_ids, input_mask = input_mask, segment_ids= segment_ids, use_one_hot_embeddings = use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN or mode==tf.estimator.ModeKeys.EVAL: seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot( positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) per_example_loss = -tf.reduce_sum(log_probs * one_hot_positions, axis=[-1]) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss, per_example_loss start_positions = features["start_positions"] end_positions = features["end_positions"] start_loss, per_example_start_loss = compute_loss(start_logits, start_positions) end_loss, per_example_end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss)/ 2.0 per_example_total_loss = (per_example_start_loss+per_example_end_loss)/2.0 train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) if mode==tf.estimator.ModeKeys.TRAIN: output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, loss = total_loss, train_op = train_op, scaffold_fn = scaffold_fn) if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_total_loss): per_example_total_loss = tf.reshape(per_example_total_loss,[-1]) eval_loss = tf.metrics.mean(values=per_example_total_loss) return dict(eval_loss = eval_loss) output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, eval_metrics = (metric_fn, [per_example_total_loss]), loss = total_loss, scaffold_fn = scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "unique_ids" : unique_ids, "start_logits" : start_logits, "end_logits" : end_logits } output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode = mode, predictions=predictions, scaffold_fn= scaffold_fn) else: raise ValueError("Only TRAIN, EVAL and PREDICT modes are supported : %s" % (mode)) return output_spec
def train(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.device_map #一个处理的类,包括训练数据的输入等 processors = {"ner": NerProcessor} #载入bert配置文件 bert_config = modeling.BertConfig.from_json_file(args.bert_config_file) #检查序列的最大长度是否超出范围 if args.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (args.max_seq_length, bert_config.max_position_embeddings)) # 在re train 的时候,才删除上一轮产出的文件,在predicted 的时候不做clean if args.clean and args.do_train: if os.path.exists(args.output_dir): def del_file(path): ls = os.listdir(path) for i in ls: c_path = os.path.join(path, i) if os.path.isdir(c_path): del_file(c_path) else: os.remove(c_path) try: del_file(args.output_dir) except Exception as e: print(e) print('pleace remove the files of output dir and data.conf') exit(-1) #check output dir exists if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) #通过output_dir初始化数据处理类,processor processor = processors[args.ner](args.output_dir) #通过bert字典,初始化bert自带分词类 tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=args.do_lower_case) train_examples = None eval_examples = None num_train_steps = None num_warmup_steps = None #一般都是True if args.do_train and args.do_eval: # 加载训练数据,train和dev,会自动拼接文件夹和train.txt #返回的训练数据是一个list,每个元素是两个字符串,空格分隔字,空格分隔字标记,并写入训练examples类中 train_examples = processor.get_train_examples(args.data_dir) #训练步数 num_train_steps = int( len(train_examples) * 1.0 / args.batch_size * args.num_train_epochs) if num_train_steps < 1: raise AttributeError('training data is so small...') # num_warmup_steps = int(num_train_steps * args.warmup_proportion) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", args.batch_size) tf.logging.info(" Num steps = %d", num_train_steps) #读取验证集 eval_examples = processor.get_dev_examples(args.data_dir) # 打印验证集数据信息 tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", args.batch_size) #获取标签集合,是一个list,如果是自己输入的话,这里一定不能搞错,会影响最后预测的类目 #一般label_list为所以的标签,[CLS],[SEP],O,这三个 label_list = processor.get_labels() # label_list=["O", 'B-TIM', 'I-TIM', "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "X", "[CLS]", "[SEP]"] # label_list=['B-PER', '[SEP]', 'I-ORG', 'O', 'I-LOC', 'I-PER', 'B-ORG', 'B-LOC', '[CLS]', 'X'] num_labels = len(label_list) + 1 init_checkpoint = args.init_checkpoint learning_rate = args.learning_rate with tf.name_scope('input'): input_ids = tf.placeholder(tf.int32, [None, args.max_seq_length]) input_mask = tf.placeholder(tf.int32, [None, args.max_seq_length]) segment_ids = tf.placeholder(tf.int32, [None, args.max_seq_length]) label_ids = tf.placeholder(tf.int32, [None, args.max_seq_length]) is_training = tf.placeholder(tf.bool) #对参数赋值,对于训练模型来说 # with tf.name_scope('input_eval'): # input_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length]) # input_mask_eval = tf.placeholder(tf.int32, [None, args.max_seq_length]) # segment_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length]) # label_ids_eval = tf.placeholder(tf.int32, [None, args.max_seq_length]) # with tf.name_scope('model_compute') as scope: # total_loss, logits, trans, pred_ids = create_model( # bert_config, True, input_ids, input_mask, segment_ids, label_ids, # num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) # scope.reuse_variables() #create_model第一位为is_training def train_model(): return create_model(bert_config, True, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers, reuse=False) def eval_model(): return create_model(bert_config, False, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers, reuse=True) total_loss, logits, trans, pred_ids = tf.cond(tf.equal( is_training, tf.constant(True)), true_fn=train_model, false_fn=eval_model) accuracy, acc_op = tf.metrics.accuracy( labels=label_ids, predictions=pred_ids) #计算准确率,pred_ids是预测序列, #输出loss的smmary tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('accuracy', acc_op) #---------------------输出验证集,测试集数据------------------------------ # is_training_evl = False #bert模型不采用training模式 # total_loss_evl, logits_evl, trans_evl, pred_ids_evl = create_model( # bert_config, is_training_evl, input_ids, input_mask, segment_ids, label_ids, # num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) # accuracy_evl, acc_op_evl = tf.metrics.accuracy(labels=label_ids, predictions=pred_ids_evl) # 计算准确率,pred_ids是预测序列 # tf.summary.scalar('total_loss_evl', total_loss_evl) # tf.summary.scalar('accuracy_evl', acc_op_evl) #---------------------------------------------------------------------------- #加载预训练隐变量 tvars = tf.trainable_variables() # 加载BERT模型,assignmen_map,加载的预训练变量值 if init_checkpoint: #只会运行一次 (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) #优化loss train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) # 1. 将数据转化为tf_record 数据,并把训练数据序列化,并写出到文件 train_file = os.path.join(args.output_dir, "train.tf_record") #ok if not os.path.exists(train_file): filed_based_convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer, train_file, args.output_dir) # 2.读取record 数据,组成batch,把上一部输出到文件的训练数据读取 train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=args.max_seq_length, is_training=True, drop_remainder=True, batch_size=args.batch_size) # estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) eval_file = os.path.join(args.output_dir, "eval.tf_record") if not os.path.exists(eval_file): filed_based_convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, eval_file, args.output_dir) #构建验证集数据 eval_input_fn = file_based_input_fn_builder(input_file=eval_file, seq_length=args.max_seq_length, is_training=False, drop_remainder=False, batch_size=args.batch_size) train_input = train_input_fn.make_one_shot_iterator() eval_input = eval_input_fn.make_one_shot_iterator() sess = tf.InteractiveSession() max_step = 1500 merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('./log', sess.graph) meta_train_data = train_input.get_next() meta_eval_data = eval_input.get_next() #获取验证数据集 #参数batch_size是64,train_batch_size是32,不知道train_batch_size是什么用的 #------------------解决FailedPreconditionError:问题,初始化所有变量,不知道这样会不会影响初始化的bert预训练变量------------------ # init_op = tf.initialize_all_variables() # init_global= tf.global_variables_initializer() config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 # config.gpu_options.allow_growth = True # 动态申请显存 sess = tf.Session(config=config) sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) eval_data = sess.run(meta_eval_data) print(label_list) label_map = {} # 1表示从1开始对label进行index化 for (i, label) in enumerate(label_list, 1): label_map[label] = i print(label_map) #------------------问题-------------------------------------------------------------------------------------------- for i in range(max_step): #把tensor转化为numpy输入 train_data = sess.run(meta_train_data) #is_traing,是否使用bert,以及lstm中的dropout层 #istraing,True,False混合使用,会涉及共享变量的问题,貌似共享变量后产生bug,暂时先false,不使用dropout。 #好像后面的variable_scope都reuse,也不会产生问题 sess.run(train_op, feed_dict={ input_ids: train_data['input_ids'], input_mask: train_data['input_mask'], segment_ids: train_data['segment_ids'], label_ids: train_data['label_ids'], is_training: False }) if i % 10 == 1: train_summary, acco, prediction = sess.run( [merged, acc_op, pred_ids], feed_dict={ input_ids: train_data['input_ids'], input_mask: train_data['input_mask'], segment_ids: train_data['segment_ids'], label_ids: train_data['label_ids'], is_training: False }) acco_evl, prediction_eval = sess.run( [acc_op, pred_ids], feed_dict={ input_ids: eval_data['input_ids'], input_mask: eval_data['input_mask'], segment_ids: eval_data['segment_ids'], label_ids: eval_data['label_ids'], is_training: False }) train_writer.add_summary(train_summary, i) print('saving summary at %s, accuracy %s, accuracy_eval %s' % (i, acco, acco_evl)) # print(prediction) # print(train_data['label_ids']) mymetrics.compute(prediction_eval, eval_data['label_ids'], label_list) train_writer.close()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] mention_ids = features["mention_id"] uids = features["uid"] start_token = features["start_token"] end_token = features["end_token"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, mention_rep) = create_zeshel_model( bert_config, output_dim, num_cands, margin, is_training, input_ids, input_mask, segment_ids, mention_ids, start_token, end_token, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = bert.modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "uid": uids, "mention_rep": mention_rep }, loss=total_loss, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument from tensorflow.python.estimator.model_fn import EstimatorSpec tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = BertSim.create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) \ = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, False) output_spec = EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) auc = tf.metrics.auc(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_auc": auc, "eval_loss": loss, } eval_metrics = metric_fn(per_example_loss, label_ids, logits) output_spec = EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = EstimatorSpec(mode=mode, predictions=probabilities) return output_spec
def compile(self): """Define operations for loss, measures, optimization. and create session, initialize variables. """ config = self.config # define operations for loss, measures, optimization self.loss = self.__compute_loss() self.accuracy, self.precision, self.recall, self.f1 = self.__compute_measures( ) with tf.variable_scope('optimization'): self.global_step = tf.train.get_or_create_global_step() if 'bert' in config.emb_class: from bert import optimization if config.use_bert_optimization: self.learning_rate = tf.constant( value=config.starter_learning_rate, shape=[], dtype=tf.float32) self.train_op = optimization.create_optimizer( self.loss, config.starter_learning_rate, config.num_train_steps, config.num_warmup_steps, False) else: # exponential decay of the learning rate self.learning_rate = tf.train.exponential_decay( config.starter_learning_rate, self.global_step, config.decay_steps, config.decay_rate, staircase=True) # linear warmup, if global_step < num_warmup_steps, then # learning rate = (global_step / num_warmup_steps) * starter_learning_rate global_steps_int = tf.cast(self.global_step, tf.int32) warmup_steps_int = tf.constant(config.num_warmup_steps, dtype=tf.int32) global_steps_float = tf.cast(global_steps_int, tf.float32) warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_percent_done = global_steps_float / warmup_steps_float warmup_learning_rate = config.starter_learning_rate * warmup_percent_done is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) self.learning_rate = ( (1.0 - is_warmup) * self.learning_rate + is_warmup * warmup_learning_rate) # Adam optimizer with correct L2 weight decay optimizer = optimization.AdamWeightDecayOptimizer( learning_rate=self.learning_rate, weight_decay_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=[ "LayerNorm", "layer_norm", "bias" ]) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.loss, tvars), config.clip_norm) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) new_global_step = self.global_step + 1 self.train_op = tf.group( train_op, [self.global_step.assign(new_global_step)]) else: # exponential decay of the learning rate self.learning_rate = tf.train.exponential_decay( config.starter_learning_rate, self.global_step, config.decay_steps, config.decay_rate, staircase=True) # linear warmup, if global_step < num_warmup_steps, then # learning rate = (global_step / num_warmup_steps) * starter_learning_rate global_steps_int = tf.cast(self.global_step, tf.int32) warmup_steps_int = tf.constant(config.num_warmup_steps, dtype=tf.int32) global_steps_float = tf.cast(global_steps_int, tf.float32) warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_percent_done = global_steps_float / warmup_steps_float warmup_learning_rate = config.starter_learning_rate * warmup_percent_done is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) self.learning_rate = ((1.0 - is_warmup) * self.learning_rate + is_warmup * warmup_learning_rate) # Adam optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.loss, tvars), config.clip_norm) self.train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) ''' # Adam optimizer with cyclical learning rate import clr # https://github.com/mhmoodlan/cyclic-learning-rate self.learning_rate = clr.cyclic_learning_rate(global_step=self.global_step, learning_rate=config.starter_learning_rate * 0.3, # 0.0003 max_lr=config.starter_learning_rate, # 0.001 step_size=5000, mode='triangular') optimizer = tf.train.AdamOptimizer(self.learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), config.clip_norm) self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step) ''' # create session, initialize variables. this should be placed at the end of graph definitions. session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) feed_dict = {self.wrd_embeddings_init: config.embvec.wrd_embeddings} sess.run(tf.global_variables_initializer(), feed_dict=feed_dict) # feed large embedding data sess.run(tf.local_variables_initializer()) # for tf_metrics self.sess = sess
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = tf.reshape(features["input_ids"], [-1, FLAGS.max_seq_length]) input_mask = tf.reshape(features["input_mask"], [-1, FLAGS.max_seq_length]) segment_ids = tf.reshape(features["segment_ids"], [-1, FLAGS.max_seq_length]) label_ids = features["label"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (cpc_loss, _, logits, probabilities) = model_builder.create_model_bilin( model, label_ids, num_choices) total_loss = cpc_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(cpc_loss, label_ids, logits): """Collect metrics for function.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) cpc_loss_metric = tf.metrics.mean(values=cpc_loss) metric_dict = { "eval_accuracy": accuracy, "eval_cpc_loss": cpc_loss_metric, } return metric_dict eval_metrics = (metric_fn, [cpc_loss, label_ids, logits]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # 打印加载模型的参数 for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 钩子,这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, logits, trans]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) # else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn ) return output_spec