def validate_per_intent(model_path, dataset_path, batch_size): """ validation will write mistaken samples to files and make scores. """ model = torch.load(model_path, map_location='cpu') dataset = torch.load(dataset_path, map_location='cpu') # Get the sentence list in test dataset. sent_list = dataset.test_sentence pred_slot, real_slot, exp_pred_intent, real_intent, pred_intent = Processor.prediction( model, dataset, "test", batch_size) all_data = defaultdict(dict) for i in set(real_intent): all_data[i] = { 'pred_slot': [], 'real_slot': [], 'exp_pred_intent': [], 'real_intent': [], 'pred_intent': [] } length = len(real_intent) for i in range(length): all_data[real_intent[i]]['pred_slot'].append(pred_slot[i]) all_data[real_intent[i]]['real_slot'].append(real_slot[i]) all_data[real_intent[i]]['exp_pred_intent'].append( exp_pred_intent[i]) all_data[real_intent[i]]['real_intent'].append(real_intent[i]) all_data[real_intent[i]]['pred_intent'].append(pred_intent[i]) slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0] intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, exp_pred_intent, real_intent) result_per_intent = defaultdict(list) result_per_intent['overall'] = [ slot_f1, intent_acc, sent_acc, len(real_intent) ] for intent in all_data: pred_slot = all_data[intent]['pred_slot'] real_slot = all_data[intent]['real_slot'] exp_pred_intent = all_data[intent]['exp_pred_intent'] real_intent = all_data[intent]['real_intent'] pred_intent = all_data[intent]['pred_intent'] length = len(exp_pred_intent) slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0] intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, exp_pred_intent, real_intent) result_per_intent[intent] = [slot_f1, intent_acc, sent_acc, length] return result_per_intent
def validate(model_path, dataset_path, batch_size): """ validation will write mistaken samples to files and make scores. """ model = torch.load(model_path) dataset = torch.load(dataset_path) # Get the sentence list in test dataset. sent_list = dataset.test_sentence pred_slot, real_slot, exp_pred_intent, real_intent, pred_intent = Processor.prediction( model, dataset, "test", batch_size) # To make sure the directory for save error prediction. mistake_dir = os.path.join(dataset.save_dir, "error") if not os.path.exists(mistake_dir): os.mkdir(mistake_dir) slot_file_path = os.path.join(mistake_dir, "slot.txt") intent_file_path = os.path.join(mistake_dir, "intent.txt") both_file_path = os.path.join(mistake_dir, "both.txt") # Write those sample with mistaken slot prediction. with open(slot_file_path, 'w') as fw: for w_list, r_slot_list, p_slot_list in zip( sent_list, real_slot, pred_slot): if r_slot_list != p_slot_list: for w, r, p in zip(w_list, r_slot_list, p_slot_list): fw.write(w + '\t' + r + '\t' + p + '\n') fw.write('\n') # Write those sample with mistaken intent prediction. with open(intent_file_path, 'w') as fw: for w_list, p_intent_list, r_intent, p_intent in zip( sent_list, pred_intent, real_intent, exp_pred_intent): if p_intent != r_intent: for w, p in zip(w_list, p_intent_list): fw.write(w + '\t' + p + '\n') fw.write(r_intent + '\t' + p_intent + '\n\n') # Write those sample both have intent and slot errors. with open(both_file_path, 'w') as fw: for w_list, r_slot_list, p_slot_list, p_intent_list, r_intent, p_intent in \ zip(sent_list, real_slot, pred_slot, pred_intent, real_intent, exp_pred_intent): if r_slot_list != p_slot_list or r_intent != p_intent: for w, r_slot, p_slot, p_intent_ in zip( w_list, r_slot_list, p_slot_list, p_intent_list): fw.write(w + '\t' + r_slot + '\t' + p_slot + '\t' + p_intent_ + '\n') fw.write(r_intent + '\t' + p_intent + '\n\n') slot_f1 = miulab.computeF1Score(pred_slot, real_slot)[0] intent_acc = Evaluator.accuracy(exp_pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, exp_pred_intent, real_intent) return slot_f1, intent_acc, sent_acc
def validate(model_path, dataset, batch_size, num_intent, args): """ validation will write mistaken samples to files and make scores. """ if args.gpu: model = torch.load(model_path) else: model = torch.load(model_path, map_location=torch.device('cpu')) ss, pred_slot, real_slot, pred_intent, real_intent = Processor.prediction( model, dataset, "test", batch_size, args) # To make sure the directory for save error prediction. mistake_dir = os.path.join(dataset.save_dir, "error") if not os.path.exists(mistake_dir): os.mkdir(mistake_dir) slot_f1_score = miulab.computeF1Score(ss, real_slot, pred_slot, args)[0] intent_f1_score = f1_score( instance2onehot(dataset.intent_alphabet.get_index, num_intent, real_intent), instance2onehot(dataset.intent_alphabet.get_index, num_intent, pred_intent), average='macro') intent_acc_score = Evaluator.intent_acc(pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent, real_intent) print( "slot f1: {}, intent f1: {}, intent acc: {}, exact acc: {}".format( slot_f1_score, intent_f1_score, intent_acc_score, sent_acc)) # Write those sample both have intent and slot errors. with open(os.path.join(args.save_dir, 'error.txt'), 'w', encoding="utf8") as fw: for p_slot_list, r_slot_list, p_intent_list, r_intent in \ zip(pred_slot, real_slot, pred_intent, real_intent): fw.write(','.join(p_intent_list) + '\t' + ','.join(r_intent) + '\n') for w, r_slot, in zip(p_slot_list, r_slot_list): fw.write(w + '\t' + r_slot + '\t' '\n') fw.write('\n\n') # with open(os.path.join(args.save_dir, 'slot_right.txt'), 'w', encoding="utf8") as fw: # for p_slot_list, r_slot_list, tokens in \ # zip(pred_slot, real_slot, ss): # if p_slot_list != r_slot_list: # continue # fw.write(' '.join(tokens) + '\n' + ' '.join(r_slot_list) + '\n' + ' '.join(p_slot_list) + '\n' + '\n\n') return slot_f1_score, intent_f1_score, intent_acc_score, sent_acc
def estimate(self, if_dev, test_batch=100): if if_dev: pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction( self.__model, self.__dataset, "dev", test_batch) else: pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction( self.__model, self.__dataset, "test", test_batch) slot_f1_socre = miulab.computeF1Score(pred_slot, real_slot)[0] intent_acc = Evaluator.accuracy(pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent, real_intent) return slot_f1_socre, intent_acc, sent_acc
def estimate(self, if_dev, test_batch=100): """ Estimate the performance of model on dev or test dataset. """ if if_dev: pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction( self.__model, self.__dataset, "test", test_batch) else: pred_slot, real_slot, pred_intent, real_intent, _ = self.prediction( self.__model, self.__dataset, "dev", test_batch) # 用于计算F1 slot_f1_socre = miulab.computeF1Score(pred_slot, real_slot)[0] # slot_f1_socre = Evaluator.f1_score(pred_slot, real_slot) intent_acc = Evaluator.accuracy(pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent, real_intent) return slot_f1_socre, intent_acc, sent_acc
def estimate(self, if_dev, args, test_batch=100): """ Estimate the performance of model on dev or test dataset. """ if if_dev: ss, pred_slot, real_slot, pred_intent, real_intent = self.prediction( self.__model, self.__dataset, "dev", test_batch, args) else: ss, pred_slot, real_slot, pred_intent, real_intent = self.prediction( self.__model, self.__dataset, "test", test_batch, args) num_intent = len(self.__dataset.intent_alphabet) slot_f1_score = miulab.computeF1Score(ss, real_slot, pred_slot, args)[0] intent_f1_score = f1_score( instance2onehot(self.__dataset.intent_alphabet.get_index, num_intent, real_intent), instance2onehot(self.__dataset.intent_alphabet.get_index, num_intent, pred_intent), average='macro') intent_acc_score = Evaluator.intent_acc(pred_intent, real_intent) sent_acc = Evaluator.semantic_acc(pred_slot, real_slot, pred_intent, real_intent) print( "slot f1: {}, intent f1: {}, intent acc: {}, exact acc: {}".format( slot_f1_score, intent_f1_score, intent_acc_score, sent_acc)) # Write those sample both have intent and slot errors. with open(os.path.join(args.save_dir, 'error.txt'), 'w', encoding="utf8") as fw: for p_slot_list, r_slot_list, p_intent_list, r_intent in \ zip(pred_slot, real_slot, pred_intent, real_intent): fw.write(','.join(p_intent_list) + '\t' + ','.join(r_intent) + '\n') for w, r_slot, in zip(p_slot_list, r_slot_list): fw.write(w + '\t' + r_slot + '\t' '\n') fw.write('\n\n') return slot_f1_score, intent_f1_score, intent_acc_score, sent_acc
def get_f1(pred_list, real_list): return computeF1Score(real_list, pred_list)[0]