def mymodel_test(logger, test_dataloader, the_time=my_time): config = ElectraConfig.from_pretrained(args.mymodel_config_dir) embedding = ElectraModel(config=config) model = EmtClassifyModel(config=config, args=args) output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/') model_state_dict = torch.load( os.path.join(output_model_file, the_time + 'pytorch_model.bin')) embedding.load_state_dict(model_state_dict) model.load(os.path.join(args.mymodel_save_dir, the_time + "mymodel.bin")) if args.fp16: embedding.half() model.half() embedding.to(device) model.to(device) embedding.eval() model.eval() acc_records = [] eval_loss, eval_accuracy = 0, 0 nb_eval_steps = 0 for step, batch in enumerate(test_dataloader): batch = tuple(t.to(device) for t in batch) b_input_ids, b_labels = batch b_input_ids = b_input_ids.squeeze(1).long() with torch.no_grad(): text_embedding = embedding(input_ids=b_input_ids) tmp_eval_accuracy = model.test(text_embedding, b_labels) eval_accuracy += tmp_eval_accuracy nb_eval_steps += 1 try: logger.info('准确率为:{:.2f}%'.format(100 * eval_accuracy / nb_eval_steps)) acc_records.append(eval_accuracy / nb_eval_steps) except ZeroDivisionError: logger.info("错误!请降低batch大小") return acc_records
def mymodel_pred(logger, text, the_time=my_time): config = ElectraConfig.from_pretrained(args.mymodel_config_dir) embedding = ElectraModel(config=config) model = EmtClassifyModel(config=config, args=args) output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/') model_state_dict = torch.load( os.path.join(output_model_file, the_time + 'pytorch_model.bin')) embedding.load_state_dict(model_state_dict) output_model_file = os.path.join(args.mymodel_save_dir, the_time + "mymodel.bin") model_state_dict = torch.load(output_model_file) model.load_state_dict(model_state_dict) if args.fp16: embedding.half() model.half() embedding.to(device) model.to(device) embedding.eval() model.eval() tokenizer = ElectraTokenizer.from_pretrained(args.vocab_dir) input_ids, _, _ = text2ids(tokenizer, text, args.max_sent_len) input_ids = torch.Tensor(input_ids).to(device=device) input_ids = input_ids.squeeze(1).long() with torch.no_grad(): text_embedding = embedding(input_ids=input_ids) pred = model.get_guess(text_embedding) print(args.label2emt[label_from_output(pred[0].to('cpu')).item()]) return
def mymodel_cal(logger, test_dataloader, the_time=my_time): config = ElectraConfig.from_pretrained(args.mymodel_config_dir) embedding = ElectraModel(config=config) model = RelClassifyModel(config=config, args=args) output_model_file = os.path.join(args.mymodel_save_dir, 'embedding/') model_state_dict = torch.load( os.path.join(output_model_file, the_time + 'pytorch_model.bin')) embedding.load_state_dict(model_state_dict) output_model_file = os.path.join(args.mymodel_save_dir, the_time + "mymodel.bin") model_state_dict = torch.load(output_model_file) model.load_state_dict(model_state_dict) if args.fp16: embedding.half() model.half() embedding.to(device) model.to(device) embedding.eval() model.eval() target_size = len(args.rel2label) result = np.zeros([target_size, target_size]) for step, batch in enumerate(test_dataloader): batch = tuple(t.to(device) for t in batch) b_input_ids1, b_input_ids2, b_labels = batch b_input_ids1 = b_input_ids1.squeeze(1).long() b_input_ids2 = b_input_ids2.squeeze(1).long() with torch.no_grad(): text_embedding1 = embedding(input_ids=b_input_ids1) text_embedding2 = embedding(input_ids=b_input_ids2, token_type_ids=torch.ones( b_input_ids2.size(), dtype=torch.long, device=device)) pred = model.get_guess(text_embedding1, text_embedding2) size = pred.size()[0] for i in range(size): try: result[b_labels[i], label_from_output(pred[i])] += 1 except: continue print(result) return result
sent_ner_config = ElectraConfig.from_pretrained(args.sent_ner_model_config_dir) sent_ner_embedding = ElectraModel(config=sent_ner_config) sent_ner_model = NerModel(config=sent_ner_config) output_model_file = os.path.join(args.sent_ner_model_save_dir, 'embedding/') model_state_dict = torch.load(os.path.join(output_model_file, 'pytorch_model.bin')) sent_ner_embedding.load_state_dict(model_state_dict) output_model_file = os.path.join(args.sent_ner_model_save_dir, "mymodel.bin") model_state_dict = torch.load(output_model_file) sent_ner_model.load_state_dict(model_state_dict) text_classify_embedding.to(device) text_classify_model.to(device) sent_ner_embedding.to(device) sent_ner_model.to(device) text_classify_embedding.eval() text_classify_model.eval() sent_ner_embedding.eval() sent_ner_model.eval() text_classify_model = MyTextClassifyModel(text_classify_model) sent_ner_model = MyNerModel(sent_ner_model) text_classify = MyTextClassify(text_classify_embedding, text_classify_model, tokenizer) sent_ner = MySentNer(sent_ner_embedding, sent_ner_model, tokenizer) # 功能选择 while True: choice = input("请选择功能,输入0退出;" '\n' "输入1从网络抽取信息;" '\n' "输入2从语料库抽取信息;" '\n' "输入3将bio标注数据集转化成xml格式;" '\n'