def get_prediction(model, sentence, save_dir, mark='Eval', verbose=False): """Evaluate the model on `steps` batches.""" # set model to evaluation mode model.eval() idx2lbl = load_obj(save_dir + "idx2lbl.json") idx2cls = load_obj(save_dir + "idx2cls.json") enc = sentence.to(device) enc_self_attn_mask = get_attn_pad_mask(enc, enc) enc_self_attn_mask.to(device) # get results from model logits_tgt, logits_clsf = model(enc, enc_self_attn_mask) # get sentence length pad_num = enc.data.eq(0).sum(axis=1) score_cls, cls_idx = torch.max(logits_clsf, dim=-1) pred_cls = cls_idx[0].data.tolist() # get valid slot for a specific intent idx_mask = load_mask(save_dir) masked_logits_tgt = softmax_mask(logits_tgt, cls_idx, idx_mask) score_tgt, tgt_idx = torch.max(masked_logits_tgt, dim=-1) pred_tags = tgt_idx[0, 0:-pad_num].data.tolist() pred_lbls = [] for idx in pred_tags: pred_lbls.append(idx2lbl[str(idx)]) pred_cls = idx2cls[str(pred_cls)] return pred_cls, pred_lbls
pred_cls, pred_lbls = test(model, test_data, args.save_dir, mark='Test', verbose=True) # Format and print response sentence # pred_tags[:] = [x for x in pred_tags if x != 'PAD'] # print('{0}\n{1}'.format(input_sentence, ' '.join(pred_lbls)).strip(), flush=True) # print('intent:', pred_cls) pretty_print(tokens, pred_lbls, pred_cls) # exit() import onnx model = model enc = test_data.to(device) enc_self_attn_mask = get_attn_pad_mask(enc, enc) enc_self_attn_mask.to(device) x = (enc, enc_self_attn_mask) # get results from model logits_tgt, logits_clsf = model(enc, enc_self_attn_mask) # logits_tgt, logits_clsf = model(enc,enc_self_attn_mask) torch.onnx.export( model, # model being run x, # model input (or a tuple for multiple inputs) args.onnx_dir + "transformer_mix.onnx", # where to save the model (can be a file or file-like object) export_params= True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding= True, # whether to execute constant folding for optimization
def evaluate_f1_no_mask(model, dl_test, save_dir, criterion_clsf = nn.CrossEntropyLoss().to(device), criterion_tgt = nn.CrossEntropyLoss(ignore_index=PAD).to(device), verbose = False): loss_test = 0 pred_tags = [] true_tags = [] pred_clss = [] true_clss = [] criterion_clsf = criterion_clsf criterion_tgt = criterion_tgt idx2lbl = load_obj(save_dir+'idx2lbl.json') for enc, tgt, cls in dl_test[:]: model.eval() with torch.no_grad(): enc = enc.to(device) tgt = tgt.to(device) cls = cls.to(device) enc_self_attn_mask = get_attn_pad_mask(enc, enc) enc_self_attn_mask.to(device) logits_tgt, logits_clsf = model(enc,enc_self_attn_mask) loss_tgt = criterion_tgt(logits_tgt.transpose(1, 2), tgt) # for masked LM loss_tgt = (loss_tgt.float()).mean() loss_clsf = criterion_clsf(logits_clsf, cls)# for sentence classification loss = loss_clsf + loss_tgt # loss = loss_clsf loss_test+=loss pad_mask = enc.data.eq(0).sum(axis = 1) score_tgt, tgt_idx = torch.max(logits_tgt,dim = -1) score_cls, cls_idx = torch.max(logits_clsf, dim = -1) for pre, true, pad_num in zip(tgt_idx, tgt, pad_mask): pred_tags += pre[0:-pad_num].data.tolist() true_tags += true[0:-pad_num].data.tolist() # print(cls_idx.size()) pred_clss += cls_idx.tolist() true_clss += cls.tolist() # print(len(pred_tags), len(true_tags)) # print(pred_tags) # print(true_tags) # print(len(pred_clss), len(true_clss)) # print(pred_clss) # print(true_clss) assert len(pred_tags) == len(true_tags) assert len(pred_clss) == len(true_clss) # print(pred_clss[-20:]) # print(true_clss[-20:]) # print(pred_tags[-20:]) # print(true_tags[-20:]) # print(enc[-20:]) f1_tgt = f1_score(pred_tags, true_tags, average='micro') f1_cls = f1_score(pred_clss, true_clss, average='micro') # logging loss, f1 and report metrics = {} true_lbls = [] pred_lbls = [] for t,p in zip(true_tags,pred_tags): true_lbls.append(idx2lbl[str(t)]) pred_lbls.append(idx2lbl[str(p)]) f1_tgt_merged = f1_score_merged(true_lbls, pred_lbls) if verbose: report = classification_report(true_lbls, pred_lbls) print("============no_mask_slot================") print(report, flush=True) return loss_test/len(dl_test), f1_cls*100, f1_tgt*100, f1_tgt_merged
def generate_report_txt( model, dl_test, save_dir, criterion_clsf=nn.CrossEntropyLoss().to(device), criterion_tgt=nn.CrossEntropyLoss(ignore_index=PAD).to(device), verbose=False): loss_test = 0 pred_tags = [] true_tags = [] pred_clss = [] true_clss = [] criterion_clsf = criterion_clsf criterion_tgt = criterion_tgt idx2lbl = load_obj(save_dir + 'idx2lbl.json') idx2cls = load_obj(save_dir + "idx2cls.json") sents = load_obj(save_dir + "TestDataSentence.txt") for enc, tgt, cls in dl_test[:]: model.eval() with torch.no_grad(): enc = enc.to(device) tgt = tgt.to(device) cls = cls.to(device) enc_self_attn_mask = get_attn_pad_mask(enc, enc) enc_self_attn_mask.to(device) logits_tgt, logits_clsf = model(enc, enc_self_attn_mask) loss_tgt = criterion_tgt(logits_tgt.transpose(1, 2), tgt) # for masked LM loss_tgt = (loss_tgt.float()).mean() loss_clsf = criterion_clsf(logits_clsf, cls) # for sentence classification loss = loss_clsf + loss_tgt # loss = loss_clsf loss_test += loss pad_mask = enc.data.eq(0).sum(axis=1) score_cls, cls_idx = torch.max(logits_clsf, dim=-1) # get valid slot for a specific intent idx_mask = load_mask(save_dir) masked_logits_tgt = softmax_mask(logits_tgt, cls_idx, idx_mask) score_tgt, tgt_idx = torch.max(masked_logits_tgt, dim=-1) for pre, true, pad_num in zip(tgt_idx, tgt, pad_mask): pred_tags.append(pre[0:-pad_num].data.tolist()) true_tags.append(true[0:-pad_num].data.tolist()) pred_clss += cls_idx.tolist() true_clss += cls.tolist() print("Prediction completed", flush=True) lines_correct = [] lines_intent_error = [] lines_slot_error = [] for idx in range(len(true_clss)): tokens = sents[idx].split(' ') true_lbls = [] pred_lbls = [] true_tags_idx = true_tags[idx] pred_tags_idx = pred_tags[idx] for t, p in zip(true_tags_idx, pred_tags_idx): true_lbls.append(idx2lbl[str(t)]) pred_lbls.append(idx2lbl[str(p)]) true_entities = get_entities(true_lbls) pred_entities = get_entities(pred_lbls) slots_true = [] slots_pred = [] for chunk_true, chunk_pred, cls_true, cls_pred in zip( true_entities, pred_entities, true_clss, pred_clss): tag, start, end = chunk_true[0], chunk_true[1], chunk_true[2] tok = ''.join(tokens[start:end + 1]) slot_true = '<{0}>: {1}'.format(tag, tok) slots_true.append(slot_true) tag, start, end = chunk_pred[0], chunk_pred[1], chunk_pred[2] tok = ''.join(tokens[start:end + 1]) slot_pred = '<{0}>: {1}'.format(tag, tok) slots_pred.append(slot_pred) intent_true = idx2cls[str(true_clss[idx])] intent_pred = idx2cls[str(pred_clss[idx])] line = "Sentence:{0:}\nExpect: \t{1}\t{2}\nPredict:\t{3}\t{4}\n".format( sents[idx], intent_true, slots_true, intent_pred, slots_pred) if intent_true != intent_pred: lines_intent_error.append(line) elif slots_true != slots_pred: lines_slot_error.append(line) else: lines_correct.append(line) correct_num = len(lines_correct) intent_w_num = len(lines_intent_error) slot_w_num = len(lines_slot_error) total_line = len(lines_intent_error) + len(lines_correct) + len( lines_slot_error) score1 = 'total line = {0}; Exact match = {1}, with intent fail = {2}, with slot fail = {3};'.format( total_line, correct_num, intent_w_num, slot_w_num) score2 = 'Accuracy = {0:.4f}'.format(correct_num / total_line) scores = [score1, score2] # saveing report print("Saving reports...", flush=True) report_dir = os.path.join(save_dir, 'reports', '') create_dir(report_dir) remove_old_file(report_dir + 'reports_correct.txt') remove_old_file(report_dir + 'reports_intent_error.txt') remove_old_file(report_dir + 'reports_slot_error.txt') remove_old_file(report_dir + 'scores.txt') with open(report_dir + 'reports_correct.txt', 'w', encoding='utf-8') as f: for line in lines_correct: f.write("{0}".format(line + '\n')) with open(report_dir + 'reports_intent_error.txt', 'w', encoding='utf-8') as f: for line in lines_intent_error: f.write("{0}".format(line + '\n')) with open(report_dir + 'reports_slot_error.txt', 'w', encoding='utf-8') as f: for line in lines_slot_error: f.write("{0}".format(line + '\n')) with open(report_dir + 'scores.txt', 'w', encoding='utf-8') as f: for line in scores: f.write("{0}".format(line + '\n'))