def train_and_eval(args, processor, tokenizer, bert_config, sess_config, label_list): """ 训练和评估函数 """ # 生成tf_record文件 train_examples = processor.get_train_examples(args.data_dir) eval_examples = processor.get_dev_examples(args.data_dir) num_train_steps = int( len(train_examples) * 1.0 / args.batch_size * args.num_train_epochs) if num_train_steps < 1: raise AttributeError('training data is so small...') num_warmup_steps = int(num_train_steps * args.warmup_proportion) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", args.batch_size) tf.logging.info(" Num steps = %d", num_train_steps) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", args.batch_size) # 写入tfrecord train_file = os.path.join(args.output_dir, "train.tf_record") if not os.path.exists(train_file): filed_based_convert_examples_to_features( train_examples, label_list, args.max_seq_length, tokenizer, train_file) eval_file = os.path.join(args.output_dir, "eval.tf_record") if not os.path.exists(eval_file): filed_based_convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer, eval_file) """ -------------分割线------------- """ # 存储路径 log_dir = os.path.join(args.output_dir, 'log') save_dir = os.path.join(args.output_dir, 'model') if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) # # 加载数据 # train_file = os.path.join(args.output_dir, "train.tf_record") # eval_file = os.path.join(args.output_dir, "eval.tf_record") # if not os.path.exists(train_file) or not os.path.exists(eval_file): # raise ValueError # 生成dataset train_data = file_based_dataset(input_file=train_file, batch_size=args.batch_size, seq_length=args.max_seq_length, is_training=True, drop_remainder=False) eval_data = file_based_dataset(input_file=eval_file, batch_size=args.batch_size, seq_length=args.max_seq_length, is_training=False, drop_remainder=False) train_iter = train_data.make_one_shot_iterator().get_next() # 开启计算图 with tf.Session(config=sess_config) as sess: # 构造模型 input_ids = tf.placeholder( shape=[None, args.max_seq_length], dtype=tf.int32, name='input_ids') input_mask = tf.placeholder( shape=[None, args.max_seq_length], dtype=tf.int32, name='input_mask') segment_ids = tf.placeholder( shape=[None, args.max_seq_length], dtype=tf.int32, name='segment_ids') label_ids = tf.placeholder( shape=[None], dtype=tf.int32, name='label_ids') position_ids = tf.placeholder( shape=[None, args.max_seq_length, 4], dtype=tf.int32, name='position_ids') pcnn_masks = tf.placeholder( shape=[None, args.max_seq_length], dtype=tf.int32, name='pcnn_masks') is_training = tf.get_variable( "is_training", shape=[], dtype=tf.bool, trainable=False) total_loss, per_example_loss, logits, probabilities = create_model_PCNN( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, len(label_list), position_ids, pcnn_masks) pred_ids = tf.argmax(probabilities, axis=-1, output_type=tf.int32, name="pred_ids") # 优化器 train_op = optimization.create_optimizer( total_loss, args.learning_rate, num_train_steps, num_warmup_steps, False) sess.run(tf.global_variables_initializer()) # 加载bert原始模型 tvars = tf.trainable_variables() if args.init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint( tvars, args.init_checkpoint) tf.train.init_from_checkpoint(args.init_checkpoint, assignment_map) # 打印加载模型的参数 for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # 初始化存储和log writer = tf.summary.FileWriter(log_dir, sess.graph) saver = tf.train.Saver() # 定义一些全局变量 best_eval_acc = 0.0 patience = 0 # 开始训练 sess.run(tf.assign(is_training, tf.constant(True, dtype=tf.bool))) for go in range(1, num_train_steps + 1): # feed train_batch = sess.run(train_iter) loss, preds, op = sess.run([total_loss, pred_ids, train_op], feed_dict={ input_ids: train_batch['input_ids'], input_mask: train_batch['input_mask'], segment_ids: train_batch['segment_ids'], label_ids: train_batch['label_ids'], position_ids: train_batch['position_ids'], pcnn_masks: train_batch['pcnn_masks']}) if go % args.save_summary_steps == 0: # 训练log writer.add_summary(tf.Summary(value=[tf.Summary.Value( tag="loss/train_loss", simple_value=loss / args.batch_size), ]), sess.run(tf.train.get_global_step())) writer.flush() if go % args.save_checkpoints_steps == 0: # 验证集评估 sess.run(tf.assign(is_training, tf.constant(False, dtype=tf.bool))) eval_loss_total = 0.0 eval_preds_total = np.array([0], dtype=np.int32) eval_truth_total = np.array([0], dtype=np.int32) # 重新生成一次验证集数据 eval_data = eval_data.repeat() eval_iter = eval_data.make_one_shot_iterator().get_next() # for _ in range(0, int(len(eval_examples) / args.batch_size) + 1): # eval集太大,这样每次用全部的话太耗费时间 for _ in range(1000): # eval feed eval_batch = sess.run(eval_iter) eval_loss, eval_preds, eval_truth = sess.run([total_loss, pred_ids, label_ids], feed_dict={ input_ids: eval_batch['input_ids'], input_mask: eval_batch['input_mask'], segment_ids: eval_batch['segment_ids'], label_ids: eval_batch['label_ids'], position_ids: eval_batch['position_ids'], pcnn_masks: eval_batch['pcnn_masks']}) # 统计结果 eval_loss_total += eval_loss eval_preds_total = np.concatenate( (eval_preds_total, eval_preds)) eval_truth_total = np.concatenate( (eval_truth_total, eval_truth)) # 处理评估结果,计算recall与f1 eval_preds_total = eval_preds_total[1:] eval_truth_total = eval_truth_total[1:] eval_f1 = metrics.f1_score( eval_truth_total, eval_preds_total, average='macro') eval_acc = metrics.accuracy_score( eval_truth_total, eval_preds_total) eval_loss_aver = eval_loss_total / 1000 # 评估实体关系分类的指标 # 评估log writer.add_summary(tf.Summary(value=[tf.Summary.Value( tag="loss/eval_loss", simple_value=eval_loss_aver), ]), sess.run(tf.train.get_global_step())) writer.add_summary(tf.Summary(value=[tf.Summary.Value( tag="eval/f1", simple_value=eval_f1), ]), sess.run(tf.train.get_global_step())) writer.add_summary(tf.Summary(value=[tf.Summary.Value( tag="eval/acc", simple_value=eval_acc), ]), sess.run(tf.train.get_global_step())) writer.flush() # early stopping 与 模型保存 if eval_acc <= best_eval_acc: patience += 1 if patience >= 100: print("early stoping!") return if eval_acc > best_eval_acc: patience = 0 best_eval_acc = eval_acc saver.save(sess, os.path.join(save_dir, "model_{}_acc_{:.4f}.ckpt".format( sess.run(tf.train.get_global_step()), best_eval_acc))) sess.run(tf.assign(is_training, tf.constant(False, dtype=tf.bool)))
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.compat.v1.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 打印变量名 # logger.info("**** Trainable Variables ****") # # # 打印加载模型的参数 # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # logger.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # train_op = optimizer.optimizer(total_loss, learning_rate, num_train_steps) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.save_summary_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"probabilities": probabilities}) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_predicting = (mode == tf.compat.v1.estimator.ModeKeys.PREDICT) # TRAIN and EVAL if not is_predicting: (loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels, bert_model_hub) train_op = optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) # Calculate evaluation metrics. def metric_fn(label_ids, predicted_labels): accuracy = tf.compat.v1.metrics.accuracy( label_ids, predicted_labels) # f1_score = tf.compat.v1.contrib.metrics.f1_score(label_ids, predicted_labels) auc = tf.compat.v1.metrics.auc(label_ids, predicted_labels) recall = tf.compat.v1.metrics.recall(label_ids, predicted_labels) precision = tf.compat.v1.metrics.precision( label_ids, predicted_labels) true_pos = tf.compat.v1.metrics.true_positives( label_ids, predicted_labels) true_neg = tf.compat.v1.metrics.true_negatives( label_ids, predicted_labels) false_pos = tf.compat.v1.metrics.false_positives( label_ids, predicted_labels) false_neg = tf.compat.v1.metrics.false_negatives( label_ids, predicted_labels) return { "eval_accuracy": accuracy, # "f1_score": f1_score, "auc": auc, "precision": precision, "recall": recall, "true_positives": true_pos, "true_negatives": true_neg, "false_positives": false_pos, "false_negatives": false_neg } eval_metrics = metric_fn(label_ids, predicted_labels) if mode == tf.compat.v1.estimator.ModeKeys.TRAIN: return tf.compat.v1.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) else: return tf.compat.v1.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metrics) else: (predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels, bert_model_hub) predictions = { 'probabilities': log_probs, 'labels': predicted_labels } return tf.compat.v1.estimator.EstimatorSpec( mode, predictions=predictions)