def start(): # produce_data() model = Bert_CRF() model.load_state_dict(load_model(args.output_dir)) device = torch.device(args.device if torch.cuda.is_available() and not args.no_cuda else "cpu") model.to(device) print('create_iter') eval_iter = create_batch_iter("valid") print('create_iter finished') # ------------------判断CUDA模式---------------------- device = torch.device(args.device if torch.cuda.is_available() and not args.no_cuda else "cpu") # -----------------------验证---------------------------- model.eval() count = 0 y_predicts, y_labels = [], [] eval_loss, eval_acc, eval_f1 = 0, 0, 0 with torch.no_grad(): for step, batch in enumerate(eval_iter): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, output_mask = batch bert_encode = model(input_ids, segment_ids, input_mask).cpu() eval_los = model.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) eval_loss = eval_los + eval_loss count += 1 predicts = model.predict(bert_encode, output_mask) predict_tensor = predicts.cpu() label_tensor = label_ids.cpu() y_predicts.append(predicts) y_labels.append(label_ids) entity_precision, entity_recall, entity_f1 = score_predict( label_tensor, predict_tensor) print( '\n step :%d - eval_loss: %4f - ent_p:%4f - ent_r:%4f - ent_f1:%4f\n' % (step, eval_loss.item() / count, entity_precision, entity_recall, entity_f1)) label_ids = label_ids.view(1, -1).squeeze() predicts = predicts.view(1, -1).squeeze() label_ids = label_ids[label_ids != -1] predicts = predicts[predicts != -1] assert len(label_ids) == len(predicts) eval_predicted = torch.cat(y_predicts, dim=0).cpu() eval_labeled = torch.cat(y_labels, dim=0).cpu() entity_precision, entity_recall, entity_f1 = score_predict( eval_labeled, eval_predicted) print( '\n\n- eval_loss: %4f - eval_acc:%4f - eval_f1:%4f - ent_p:%4f - ent_r:%4f - ent_f1:%4f\n' % (eval_loss.item() / count, eval_acc, eval_f1, entity_precision, entity_recall, entity_f1))
def __init__(self): print('[INFO]加载分词器') self.processor, self.bertTokenizer = init_params() label_list = self.processor.get_labels() self.label_map = {label: i for i, label in enumerate(label_list)} self.tokenizer = BasicTokenizer() print('[INFO]分词器加载完毕') print('[INFO]加载模型') self.model = Bert_CRF() self.model.load_state_dict(load_model(args.output_dir)) self.device = torch.device(args.device if torch.cuda.is_available() and not args.no_cuda else "cpu") self.model.to(self.device) self.model.eval() print('[INFO]模型加载完毕')
def start(): # produce_data() model = Bert_CRF() print('create_iter') train_iter, num_train_steps = create_batch_iter("train") eval_iter = create_batch_iter("valid") print('create_iter finished') epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name) print('fit') fit(model=model, training_iter=train_iter, eval_iter=eval_iter, num_epoch=args.num_train_epochs, pbar=pbar, num_train_steps=num_train_steps, verbose=1)