import os import ner from ner.preprocessor import Preprocessor from ner.utils import load_config from ner.models.bert_ner import BERT_NER from ner.model_evaluator import ModelEvaluator config = load_config(model_config_name='bert_ner_data_dist_kl_config.yaml', master_config_name='config.yaml') p = Preprocessor(**config) labelled_ds, unlabelled_ds, test_ds = p.create_tf_dataset(**config) model = BERT_NER(**config) model.load_weights(os.path.join(ner.MODEL_DIR, 'BERT_NER_data_dist_kl_final')) model_evaluator = ModelEvaluator(test_ds, results_name='BERT_data_dist_kl') model_evaluator.plot_all_latent_images(model, **config) y_true_numpy_test_set, y_pred_numpy_test_set = model_evaluator.get_test_ds_labels( model) model_evaluator.get_all_results(y_true_numpy_test_set, y_pred_numpy_test_set)
def train(): # 加载数据集 train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros) dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros) test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros) # 创建id和标签的映射文件 if not os.path.isfile(FLAGS.map_file): # Create a dictionary and a mapping for tags _t, tag_to_id, id_to_tag = tag_mapping(train_sentences) with open(FLAGS.map_file, "wb") as f: pickle.dump([tag_to_id, id_to_tag], f) else: with open(FLAGS.map_file, "rb") as f: tag_to_id, id_to_tag = pickle.load(f) # 对数据进行处理,得到可用于模型训练的数据集 train_data = prepare_dataset(train_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower) dev_data = prepare_dataset(dev_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower) test_data = prepare_dataset(test_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower) train_manager = BatchManager(train_data, FLAGS.batch_size) dev_manager = BatchManager(dev_data, FLAGS.batch_size) test_manager = BatchManager(test_data, FLAGS.batch_size) # 创建模型和log的文件夹,若不存在才创建 make_path(FLAGS) if os.path.isfile(FLAGS.config_file): config = load_config(FLAGS.config_file) # 加载配置文件 else: config = config_model(tag_to_id) # 从FLAG中获取配置文件 save_config(config, FLAGS.config_file) make_path(FLAGS) #设置log log_path = os.path.join("log", FLAGS.log_file) logger = get_logger(log_path) print_config(config, logger) # 设置GPU内存使用 tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True steps_per_epoch = train_manager.len_data # batches with tf.Session(config=tf_config) as sess: #创建模型 model = create_model(sess, Model, FLAGS.ckpt_path, config, logger) logger.info("start training") loss = [] for i in range(FLAGS.train_epoch): #获取batch for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) #训练 loss.append(batch_loss) #收集损失loss if step % FLAGS.steps_check == 0: #这里都是log iteration = step // steps_per_epoch + 1 logger.info("iteration:{},step:{}/{},loss:{:>0.4f}".format( iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] #评估模型正确率 best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger) if best: #保存模型 save_model(sess, model, FLAGS.ckpt_path, logger, global_steps=step) evaluate(sess, model, "test", test_manager, id_to_tag, logger)
from ner.trainers.bert_ner_trainer_confidence_kl import BERT_NER_TraininerConfidenceKL from ner.preprocessor import Preprocessor from ner.utils import load_config config = load_config(model_config_name='bert_ner_confidence_kl_config.yaml', master_config_name='config.yaml') p = Preprocessor(**config) labelled_ds, unlabelled_ds, test_ds = p.create_tf_dataset(**config) trainer = BERT_NER_TraininerConfidenceKL(**config) model = trainer.train(labelled_ds=labelled_ds, test_ds=test_ds, unlabelled_ds=unlabelled_ds, **config)