예제 #1
0
import os
import ner
from ner.preprocessor import Preprocessor
from ner.utils import load_config
from ner.models.bert_ner import BERT_NER
from ner.model_evaluator import ModelEvaluator

config = load_config(model_config_name='bert_ner_data_dist_kl_config.yaml',
                     master_config_name='config.yaml')

p = Preprocessor(**config)

labelled_ds, unlabelled_ds, test_ds = p.create_tf_dataset(**config)

model = BERT_NER(**config)

model.load_weights(os.path.join(ner.MODEL_DIR, 'BERT_NER_data_dist_kl_final'))

model_evaluator = ModelEvaluator(test_ds, results_name='BERT_data_dist_kl')

model_evaluator.plot_all_latent_images(model, **config)

y_true_numpy_test_set, y_pred_numpy_test_set = model_evaluator.get_test_ds_labels(
    model)

model_evaluator.get_all_results(y_true_numpy_test_set, y_pred_numpy_test_set)
예제 #2
0
def train():
    # 加载数据集
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # 创建id和标签的映射文件
    if not os.path.isfile(FLAGS.map_file):
        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            tag_to_id, id_to_tag = pickle.load(f)

    # 对数据进行处理,得到可用于模型训练的数据集
    train_data = prepare_dataset(train_sentences, FLAGS.max_seq_len, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, FLAGS.max_seq_len, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, FLAGS.max_seq_len, tag_to_id,
                                FLAGS.lower)

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, FLAGS.batch_size)
    test_manager = BatchManager(test_data, FLAGS.batch_size)
    # 创建模型和log的文件夹,若不存在才创建
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)  # 加载配置文件
    else:
        config = config_model(tag_to_id)  # 从FLAG中获取配置文件
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    #设置log
    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # 设置GPU内存使用
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    steps_per_epoch = train_manager.len_data  # batches
    with tf.Session(config=tf_config) as sess:
        #创建模型
        model = create_model(sess, Model, FLAGS.ckpt_path, config, logger)

        logger.info("start training")
        loss = []
        for i in range(FLAGS.train_epoch):
            #获取batch
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)  #训练

                loss.append(batch_loss)  #收集损失loss
                if step % FLAGS.steps_check == 0:  #这里都是log
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{},step:{}/{},loss:{:>0.4f}".format(
                        iteration, step % steps_per_epoch, steps_per_epoch,
                        np.mean(loss)))
                    loss = []
            #评估模型正确率
            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:  #保存模型
                save_model(sess,
                           model,
                           FLAGS.ckpt_path,
                           logger,
                           global_steps=step)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #3
0
from ner.trainers.bert_ner_trainer_confidence_kl import BERT_NER_TraininerConfidenceKL
from ner.preprocessor import Preprocessor
from ner.utils import load_config

config = load_config(model_config_name='bert_ner_confidence_kl_config.yaml',
                     master_config_name='config.yaml')

p = Preprocessor(**config)

labelled_ds, unlabelled_ds, test_ds = p.create_tf_dataset(**config)

trainer = BERT_NER_TraininerConfidenceKL(**config)

model = trainer.train(labelled_ds=labelled_ds,
                      test_ds=test_ds,
                      unlabelled_ds=unlabelled_ds,
                      **config)