예제 #1
0
def mymodel_test(logger, test_dataloader, the_time=my_time):
    config = ElectraConfig.from_pretrained(args.mymodel_config_dir)
    embedding = ElectraModel(config=config)
    model = EmtClassifyModel(config=config, args=args)
    output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/')
    model_state_dict = torch.load(
        os.path.join(output_model_file, the_time + 'pytorch_model.bin'))
    embedding.load_state_dict(model_state_dict)
    model.load(os.path.join(args.mymodel_save_dir, the_time + "mymodel.bin"))
    if args.fp16:
        embedding.half()
        model.half()
    embedding.to(device)
    model.to(device)
    embedding.eval()
    model.eval()
    acc_records = []
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps = 0
    for step, batch in enumerate(test_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_labels = batch
        b_input_ids = b_input_ids.squeeze(1).long()
        with torch.no_grad():
            text_embedding = embedding(input_ids=b_input_ids)
            tmp_eval_accuracy = model.test(text_embedding, b_labels)
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1
    try:
        logger.info('准确率为:{:.2f}%'.format(100 * eval_accuracy / nb_eval_steps))
        acc_records.append(eval_accuracy / nb_eval_steps)
    except ZeroDivisionError:
        logger.info("错误!请降低batch大小")
    return acc_records
예제 #2
0
def mymodel_pred(logger, text, the_time=my_time):
    config = ElectraConfig.from_pretrained(args.mymodel_config_dir)
    embedding = ElectraModel(config=config)
    model = EmtClassifyModel(config=config, args=args)
    output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/')
    model_state_dict = torch.load(
        os.path.join(output_model_file, the_time + 'pytorch_model.bin'))
    embedding.load_state_dict(model_state_dict)
    output_model_file = os.path.join(args.mymodel_save_dir,
                                     the_time + "mymodel.bin")
    model_state_dict = torch.load(output_model_file)
    model.load_state_dict(model_state_dict)
    if args.fp16:
        embedding.half()
        model.half()
    embedding.to(device)
    model.to(device)
    embedding.eval()
    model.eval()
    tokenizer = ElectraTokenizer.from_pretrained(args.vocab_dir)
    input_ids, _, _ = text2ids(tokenizer, text, args.max_sent_len)
    input_ids = torch.Tensor(input_ids).to(device=device)
    input_ids = input_ids.squeeze(1).long()
    with torch.no_grad():
        text_embedding = embedding(input_ids=input_ids)
        pred = model.get_guess(text_embedding)
    print(args.label2emt[label_from_output(pred[0].to('cpu')).item()])
    return
예제 #3
0
def mymodel_cal(logger, test_dataloader, the_time=my_time):
    config = ElectraConfig.from_pretrained(args.mymodel_config_dir)
    embedding = ElectraModel(config=config)
    model = RelClassifyModel(config=config, args=args)
    output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/')
    model_state_dict = torch.load(
        os.path.join(output_model_file, the_time + 'pytorch_model.bin'))
    embedding.load_state_dict(model_state_dict)
    output_model_file = os.path.join(args.mymodel_save_dir,
                                     the_time + "mymodel.bin")
    model_state_dict = torch.load(output_model_file)
    model.load_state_dict(model_state_dict)
    if args.fp16:
        embedding.half()
        model.half()
    embedding.to(device)
    model.to(device)
    embedding.eval()
    model.eval()
    target_size = len(args.rel2label)
    result = np.zeros([target_size, target_size])
    for step, batch in enumerate(test_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids1, b_input_ids2, b_labels = batch
        b_input_ids1 = b_input_ids1.squeeze(1).long()
        b_input_ids2 = b_input_ids2.squeeze(1).long()
        with torch.no_grad():
            text_embedding1 = embedding(input_ids=b_input_ids1)
            text_embedding2 = embedding(input_ids=b_input_ids2,
                                        token_type_ids=torch.ones(
                                            b_input_ids2.size(),
                                            dtype=torch.long,
                                            device=device))
            pred = model.get_guess(text_embedding1, text_embedding2)
        size = pred.size()[0]
        for i in range(size):
            try:
                result[b_labels[i], label_from_output(pred[i])] += 1
            except:
                continue
    print(result)
    return result
예제 #4
0
    sent_ner_config = ElectraConfig.from_pretrained(args.sent_ner_model_config_dir)
    sent_ner_embedding = ElectraModel(config=sent_ner_config)
    sent_ner_model = NerModel(config=sent_ner_config)
    output_model_file = os.path.join(args.sent_ner_model_save_dir, 'embedding/')
    model_state_dict = torch.load(os.path.join(output_model_file, 'pytorch_model.bin'))
    sent_ner_embedding.load_state_dict(model_state_dict)
    output_model_file = os.path.join(args.sent_ner_model_save_dir, "mymodel.bin")
    model_state_dict = torch.load(output_model_file)
    sent_ner_model.load_state_dict(model_state_dict)

    text_classify_embedding.to(device)
    text_classify_model.to(device)
    sent_ner_embedding.to(device)
    sent_ner_model.to(device)

    text_classify_embedding.eval()
    text_classify_model.eval()
    sent_ner_embedding.eval()
    sent_ner_model.eval()

    text_classify_model = MyTextClassifyModel(text_classify_model)
    sent_ner_model = MyNerModel(sent_ner_model)
    text_classify = MyTextClassify(text_classify_embedding, text_classify_model, tokenizer)
    sent_ner = MySentNer(sent_ner_embedding, sent_ner_model, tokenizer)

    # 功能选择
    while True:
        choice = input("请选择功能,输入0退出;" '\n'
                       "输入1从网络抽取信息;" '\n'
                       "输入2从语料库抽取信息;" '\n'
                       "输入3将bio标注数据集转化成xml格式;" '\n'