def add_test(self, pred_batchesNER, true_batchesNER, pred_batchesREL, true_batchesREL): n = open("pred_output/output_rel.txt", "a") m = open("pred_output/output_ner.txt", "a") for batch_idx in range(len(pred_batchesNER)): predNER = pred_batchesNER[batch_idx] trueNER = true_batchesNER[batch_idx] predRel = pred_batchesREL[batch_idx] trueRel = true_batchesREL[batch_idx] ptoken_ids, _, plabel_ids, phead_ids, plabel_names = utils.transformToInitialInput( predRel, self.RELset) _, _, tlabel_ids, thead_ids, tlabel_names = utils.transformToInitialInput( trueRel, self.RELset) trueRel = getTokenRelations(tlabel_names, thead_ids, ptoken_ids) predRel = getTokenRelations(plabel_names, phead_ids, ptoken_ids) n.write(str(predRel)) tagsNER = utils.getSegmentationDict(self.nerSegmentationTags)#self. if self.ner_chunk_eval == "boundaries_type": lab_chunks = set(get_chunks(trueNER, tagsNER)) lab_pred_chunks = set(get_chunks(predNER, tagsNER)) elif self.ner_chunk_eval == "boundaries": lab_chunks = set(keepOnlyChunkBoundaries(set(get_chunks(trueNER, tagsNER)))) lab_pred_chunks = set(keepOnlyChunkBoundaries(set(get_chunks(predNER, tagsNER)))) lab_chunks_list = list(lab_chunks) lab_pred_chunks_list = list(lab_pred_chunks) #n.write(str(predRel)) #m.write("\t".join(lab_pred_chunks_list)) m.write((str(lab_pred_chunks))) n.write("\n") m.write("\n")
def add(self, pred_batchesNER, true_batchesNER, pred_batchesREL, true_batchesREL,true_batchesBIONER): for batch_idx in range(len(pred_batchesNER)): predNER = pred_batchesNER[batch_idx] trueNER = true_batchesNER[batch_idx] predRel = pred_batchesREL[batch_idx] trueRel = true_batchesREL[batch_idx] trueBIONER=true_batchesBIONER[batch_idx] ptoken_ids, _, plabel_ids, phead_ids, plabel_names = utils.transformToInitialInput( predRel, self.RELset) _, _, tlabel_ids, thead_ids, tlabel_names = utils.transformToInitialInput( trueRel, self.RELset) trueRel = getTokenRelations(tlabel_names, thead_ids, ptoken_ids) predRel = getTokenRelations(plabel_names, phead_ids, ptoken_ids) #print (self.NERset) tagsNER = utils.getSegmentationDict(self.nerSegmentationTags)#self. lab_chunks_ = set(get_chunks(listOfTagsToids(trueBIONER,self.nerSegmentationTags), tagsNER)) #lab_pred_chunks = set(get_chunks(predNER, tagsNER)) lab_chunks_list_ = list(lab_chunks_) trueNER_tags=listOfIdsToTags(trueNER,self.NERset) predNER_tags=listOfIdsToTags(predNER, self.NERset) lab_chunks = set(classesToChunks(trueNER_tags, lab_chunks_list_)) lab_pred_chunks=set(classesToChunks(predNER_tags, lab_chunks_list_)) lab_chunks_list = list(lab_chunks) lab_pred_chunks_list = list(lab_pred_chunks) for lab_idx in range(len(lab_pred_chunks_list)): if lab_pred_chunks_list[lab_idx] in lab_chunks_list: # print (lab_pred_chunks_list[lab_idx][0]) self.tpsClassesNER[lab_pred_chunks_list[lab_idx][0]] += 1 else: self.fpsClassesNER[lab_pred_chunks_list[lab_idx][0]] += 1 # fnsEntitiesNER+=1 for lab_idx in range(len(lab_chunks_list)): if lab_chunks_list[lab_idx] not in lab_pred_chunks_list: self.fnsClassesNER[lab_chunks_list[lab_idx][0]] += 1 relTrue = set(relationChunks(trueRel, lab_chunks_list,relationTuple=self.rel_chunk_eval)) relPred = set(relationChunks(predRel, lab_pred_chunks_list,relationTuple=self.rel_chunk_eval)) relTrueList = list(relTrue) # trueRel# # if (len(trueRel)!=len(relTrueList)): # print ("warning") relPredList = list(relPred) # predRel# #print("GOLD REL chunks:" + str(relTrueList)) #print("PRED REL chunks:" + str(relPredList)) for lab_idx in range(len(relPredList)): if relPredList[lab_idx] in relTrueList: # print (lab_pred_chunks_list[lab_idx][0]) self.tpsClassesREL[relPredList[lab_idx][1]] += 1 # print (relPredList[lab_idx]) else: self.fpsClassesREL[relPredList[lab_idx][1]] += 1 # fnsEntitiesNER+=1 for lab_idx in range(len(relTrueList)): if relTrueList[lab_idx] not in relPredList: self.fnsClassesREL[relTrueList[lab_idx][1]] += 1 self.correct_predsNER += len(lab_chunks & lab_pred_chunks) self.total_predsNER += len(lab_pred_chunks) self.total_correctNER += len(lab_chunks) self.correct_predsREL += len(relTrue & relPred) self.total_predsREL += len(relPred) self.total_correctREL += len(relTrue)
def add(self, pred_batchesNER, true_batchesNER, pred_batchesREL, true_batchesREL): for batch_idx in range(len(pred_batchesNER)): predNER = pred_batchesNER[batch_idx] trueNER = true_batchesNER[batch_idx] predRel = pred_batchesREL[batch_idx] trueRel = true_batchesREL[batch_idx] ptoken_ids, _, plabel_ids, phead_ids, plabel_names = utils.transformToInitialInput( predRel, self.RELset) _, _, tlabel_ids, thead_ids, tlabel_names = utils.transformToInitialInput( trueRel, self.RELset) trueRel = getTokenRelations(tlabel_names, thead_ids, ptoken_ids) predRel = getTokenRelations(plabel_names, phead_ids, ptoken_ids) tagsNER = utils.getSegmentationDict( self.nerSegmentationTags) #self. if self.ner_chunk_eval == "boundaries_type": lab_chunks = set(get_chunks(trueNER, tagsNER)) lab_pred_chunks = set(get_chunks(predNER, tagsNER)) elif self.ner_chunk_eval == "boundaries": lab_chunks = set( keepOnlyChunkBoundaries(set(get_chunks(trueNER, tagsNER)))) lab_pred_chunks = set( keepOnlyChunkBoundaries(set(get_chunks(predNER, tagsNER)))) lab_chunks_list = list(lab_chunks) lab_pred_chunks_list = list(lab_pred_chunks) if self.ner_chunk_eval == "boundaries_type": for lab_idx in range(len(lab_pred_chunks_list)): if lab_pred_chunks_list[lab_idx] in lab_chunks_list: # print (lab_pred_chunks_list[lab_idx][0]) self.tpsClassesNER[lab_pred_chunks_list[lab_idx] [0]] += 1 else: self.fpsClassesNER[lab_pred_chunks_list[lab_idx] [0]] += 1 # fnsEntitiesNER+=1 for lab_idx in range(len(lab_chunks_list)): if lab_chunks_list[lab_idx] not in lab_pred_chunks_list: self.fnsClassesNER[lab_chunks_list[lab_idx][0]] += 1 elif self.ner_chunk_eval == "boundaries": for lab_idx in range(len(lab_pred_chunks_list)): if lab_pred_chunks_list[lab_idx] in lab_chunks_list: # print (lab_pred_chunks_list[lab_idx][0]) self.tpsNER += 1 else: self.fpsNER += 1 # fnsEntitiesNER+=1 for lab_idx in range(len(lab_chunks_list)): if lab_chunks_list[lab_idx] not in lab_pred_chunks_list: self.fnsNER += 1 if self.root_node == True: lab_chunks_list_with_ROOT = copy.deepcopy(lab_chunks_list) lab_chunks_list_with_ROOT.append((None, 0, 0)) lab_pred_chunks_list_with_ROOT = copy.deepcopy( lab_pred_chunks_list) lab_pred_chunks_list_with_ROOT.append((None, 0, 0)) relTrue = set( relationChunks(trueRel, lab_chunks_list_with_ROOT, relationTuple=self.rel_chunk_eval)) relPred = set( relationChunks(predRel, lab_pred_chunks_list_with_ROOT, relationTuple=self.rel_chunk_eval)) else: relTrue = set( relationChunks(trueRel, lab_chunks_list, relationTuple=self.rel_chunk_eval)) relPred = set( relationChunks(predRel, lab_pred_chunks_list, relationTuple=self.rel_chunk_eval)) relTrueList = list(relTrue) # trueRel# # if (len(trueRel)!=len(relTrueList)): # print ("warning") relPredList = list(relPred) # predRel# for lab_idx in range(len(relPredList)): if relPredList[lab_idx] in relTrueList: # print (lab_pred_chunks_list[lab_idx][0]) self.tpsClassesREL[relPredList[lab_idx][1]] += 1 # print (relPredList[lab_idx]) else: self.fpsClassesREL[relPredList[lab_idx][1]] += 1 # fnsEntitiesNER+=1 for lab_idx in range(len(relTrueList)): if relTrueList[lab_idx] not in relPredList: self.fnsClassesREL[relTrueList[lab_idx][1]] += 1 self.correct_predsNER += len(lab_chunks & lab_pred_chunks) self.total_predsNER += len(lab_pred_chunks) self.total_correctNER += len(lab_chunks) self.correct_predsREL += len(relTrue & relPred) self.total_predsREL += len(relPred) self.total_correctREL += len(relTrue)
import utils