def span_prf1_type_map( reference_docs: Sequence[Doc], test_docs: Sequence[Doc], type_map: Optional[Mapping[str, str]] = None, ) -> Dict[str, PRF1]: """ if type_map is not None: remapping(reference_docs, type_map) # ugly code, but otherwise the remapping(test_docs, type_map) """ counts = defaultdict(lambda: defaultdict(int)) for i in range(len(reference_docs)): ents_from_ref = {ent for ent in reference_docs[i].ents} ents_from_test = {ent for ent in test_docs[i].ents} if type_map is not None: ents_from_ref = remapping(ents_from_ref, type_map) # ugly code, but otherwise the ents_from_test = remapping(ents_from_test, type_map) for ent_test in ents_from_test: if is_ent_in_list(ent_test, ents_from_ref): counts[ent_test.label_]["tp"] += 1 else: counts[ent_test.label_]["fp"] += 1 for ent_ref in ents_from_ref: if not is_ent_in_list(ent_ref, ents_from_test): counts[ent_ref.label_]["fn"] += 1 prf1 = dict() for key, value in counts.items(): precision = get_precision(counts[key]["tp"], counts[key]["fp"]) recall = get_recall(counts[key]["tp"], counts[key]["fn"]) f1 = get_f1(precision, recall) prf1[key] = PRF1(precision, recall, f1) get_prf1_all(counts, prf1) #print(counts) return prf1
def get_prf1_all(counts, prf1): tp_all = 0 fp_all = 0 fn_all = 0 for ent, values in counts.items(): tp_all += counts[ent]["tp"] fp_all += counts[ent]["fp"] fn_all += counts[ent]["fn"] precision_all = get_precision(tp_all, fp_all) recall_all = get_recall(tp_all, fn_all) prf1[""] = PRF1(precision_all, recall_all, get_f1(precision_all, recall_all))
def span_prf1_type_map( reference_docs: Sequence[Doc], test_docs: Sequence[Doc], type_map: Optional[Mapping[str, str]] = None) -> Dict[str, PRF1]: mapped_ents = type_map.keys() if type_map is not None else {} counts = defaultdict(lambda: defaultdict(int)) for test_doc, ref_doc in zip(test_docs, reference_docs): test_spans = [ (ent.start, ent.end, type_map[ent.label_] if ent.label_ in mapped_ents else ent.label_) for ent in test_doc.ents ] ref_spans = [ (ent.start, ent.end, type_map[ent.label_] if ent.label_ in mapped_ents else ent.label_) for ent in ref_doc.ents ] for span in test_spans: if span in ref_spans: # true positive counts[span[2]]['tp'] += 1 counts[""]['tp'] += 1 else: # false positive counts[span[2]]['fp'] += 1 counts[""]['fp'] += 1 for span in ref_spans: if span not in test_spans: # false negative counts[span[2]]['fn'] += 1 counts[""]['fn'] += 1 prf1_dict = {} for ent_type, count in counts.items(): p = count['tp'] / (count['tp'] + count['fp']) if ( count['tp'] + count['fp']) != 0 else 0.0 r = count['tp'] / (count['tp'] + count['fn']) if ( count['tp'] + count['fn']) != 0 else 0.0 f1 = (2 * (p * r) / (p + r)) if (p + r) != 0 else 0.0 prf1 = PRF1(p, r, f1) prf1_dict[ent_type] = prf1 return prf1_dict
def span_prf1(reference_docs: Sequence[Doc], test_docs: Sequence[Doc], typed: bool = True) -> Dict[str, PRF1]: counts = defaultdict(lambda: defaultdict(int)) for test_doc, ref_doc in zip(test_docs, reference_docs): test_spans = [(ent.start, ent.end, ent.label_) if typed else (ent.start, ent.end, "") for ent in test_doc.ents] ref_spans = [(ent.start, ent.end, ent.label_) if typed else (ent.start, ent.end, "") for ent in ref_doc.ents] for span in test_spans: if span in ref_spans: # true positive counts[span[2]]['tp'] += 1 counts["all"]['tp'] += 1 else: # false positive counts[span[2]]['fp'] += 1 counts["all"]['fp'] += 1 for span in ref_spans: if span not in test_spans: # false negative counts[span[2]]['fn'] += 1 counts["all"]['fn'] += 1 prf1_dict = {} for ent_type, count in counts.items(): p = count['tp'] / (count['tp'] + count['fp']) if ( count['tp'] + count['fp']) != 0 else 0.0 r = count['tp'] / (count['tp'] + count['fn']) if ( count['tp'] + count['fn']) != 0 else 0.0 f1 = (2 * (p * r) / (p + r)) if (p + r) != 0 else 0.0 prf1 = PRF1(p, r, f1) prf1_dict[ent_type] = prf1 if typed: prf1_dict[""] = prf1_dict.pop("all") else: del prf1_dict["all"] return prf1_dict
def span_prf1_type_map( reference_docs: Sequence[Doc], test_docs: Sequence[Doc], type_map: Optional[Mapping[str, str]] = None, ) -> Dict[str, PRF1]: # Make good use of sets here. # print(reference_docs[0].ents) # print(test_docs[0].ents) #type_map = {"GPE": "GPE_LOC", "LOC": "GPE_LOC"} if len(reference_docs) != len(test_docs): # Should always be same length raise ValueError refset = set() tesset = set() label_options = set([""]) if type_map == None: type_map = {} for i in range(len(reference_docs)): ref = reference_docs[i] tes = test_docs[i] refents = remap(ref.ents, type_map, i) # i to keep docs discernable. tesents = remap(tes.ents, type_map, i) for ent in refents: refset.add(ent) label_options.add(ent[0].label_) for ent in tesents: tesset.add(ent) label_options.add(ent[0].label_) # eh, why not to_return = {} for label in label_options: if label == "": refset1 = set([(x[0].start, x[0].end, x[0].label_, x[1]) for x in refset]) tesset1 = set([(x[0].start, x[0].end, x[0].label_, x[1]) for x in tesset]) else: refset1 = set([(x[0].start, x[0].end, x[0].label_, x[1]) for x in refset if x[0].label_ == label]) tesset1 = set([(x[0].start, x[0].end, x[0].label_, x[1]) for x in tesset if x[0].label_ == label]) #print([x for x in refset1], [x for x in tesset1]) # I wanna use sets, but it's not playing nice with checking the label :( #set([x for x in refset1 for y in tesset1 if x == y and x.label_ == y.label_])# tp = refset1.intersection(tesset1) #['EPG', 'COL', 'REP'] fp = tesset1 - tp fn = refset1 - tp # print(label + "TP:", tp, tp) # print(label + "FP:", fp) # print(label + "FN:", fn) if len(tp) == 0: p = 0 r = 0 else: p = (float(len(tp)) / (len(tp) + len(fp))) r = (float(len(tp)) / (len(tp) + len(fn))) if p + r == 0: f1 = 0 else: f1 = (2 * p * r) / (p + r) #f1 = 2 / (1.0 / p + 1.0 / r) to_return[label] = PRF1(precision=p, recall=r, f1=f1) #print(tp, fp, fn) #Dict[str, PRF1] # print(to_return) return to_return
def span_prf1(reference_docs: Sequence[Doc], test_docs: Sequence[Doc], typed: bool = True) -> Dict[str, PRF1]: decoded_gold1 = [doc.ents for doc in reference_docs] decoded_gold = [] for ents in decoded_gold1: decoded_gold.extend(ents) decoded_prediction1 = [doc.ents for doc in test_docs] decoded_prediction = [] for ents in decoded_prediction1: decoded_prediction.extend(ents) cur_pred = 0 cur_gold = 0 true_pos = [] false_pos = [] false_neg = [] for _ in range( len(decoded_gold) + len(decoded_prediction)): # this is the upper bound for iterating # Do something akin to merge sort. Iterate through each list, 'popping' off the one earlier in docs # Could probably refactor to actually use popping and holding current values being considered # Wouldn't save a lot of lines though if cur_pred >= len(decoded_prediction): # put the rest into the false negative bin and stop iterating false_neg.extend(decoded_gold[cur_gold:]) break elif cur_gold >= len(decoded_gold): # put the rest in the false positive bin false_pos.extend(decoded_prediction[cur_pred:]) break elif decoded_prediction[cur_pred].start == decoded_gold[cur_pred].start and \ decoded_prediction[cur_pred].end == decoded_gold[cur_pred].end: if typed and decoded_prediction[cur_pred].label_ != decoded_gold[ cur_pred].label_: # False pos and false neg false_pos.append(decoded_prediction[cur_pred]) false_neg.append(decoded_gold[cur_gold]) else: # True positive, only time it'll actually be appended to! true_pos.append(decoded_prediction[cur_pred]) # Increment both regardless. cur_pred += 1 cur_gold += 1 else: # Not a match on length. if decoded_prediction[cur_pred].start < decoded_gold[ cur_gold].start: # false positive false_pos.append(decoded_prediction[cur_pred]) cur_pred += 1 elif decoded_prediction[cur_pred].start > decoded_gold[ cur_gold].start: # false negative false_neg.append(decoded_gold[cur_gold]) cur_gold += 1 else: #Obvs neither matches false_pos.append(decoded_prediction[cur_pred]) false_neg.append(decoded_gold[cur_gold]) cur_gold += 1 cur_pred += 1 # NOW go through each of the three lists and count it all up. to_return = {} types = set() types.add("") # Look at this lifehack if typed: for tag in decoded_gold: types.add(tag.label_) for tag in decoded_prediction: types.add(tag.label_) #print(types) for label in types: # It's still O(N) tp = len([x for x in true_pos if label in x.label_]) # "" is in everything fp = len([x for x in false_pos if label in x.label_]) fn = len([x for x in false_neg if label in x.label_]) if (tp == 0 and fp == 0): precision = 0 else: precision = tp / (tp + fp) if tp == 0 and fn == 0: recall = 0 else: recall = tp / (tp + fn) if precision == 0 or recall == 0: f1 = 0 else: f1 = 2 / (1.0 / precision + 1.0 / recall) # Harmonic mean woo to_return[label] = PRF1(precision, recall, f1) return to_return
def test_span_prf1_type_map(self): docs = [[["George", "Washington"]], [["Maryland"]], [["Asia"]]] # Check that testing reference is perfect correct_bio = [["B-PER", "I-PER"], ["B-GPE"], ["B-LOC"]] ref = _create_docs(docs, correct_bio, NLP) self.assertPRF1DictAlmostEqual( { "": PRF1(1.0, 1.0, 1.0), "PER": PRF1(1.0, 1.0, 1.0), "GPE": PRF1(1.0, 1.0, 1.0), "LOC": PRF1(1.0, 1.0, 1.0), }, span_prf1_type_map(ref, ref), ) # Empty type map has no effect self.assertPRF1DictAlmostEqual( { "": PRF1(1.0, 1.0, 1.0), "PER": PRF1(1.0, 1.0, 1.0), "GPE": PRF1(1.0, 1.0, 1.0), "LOC": PRF1(1.0, 1.0, 1.0), }, span_prf1_type_map(ref, ref, type_map={}), ) # Remapping types without changing performance backwards_type_map = {"GPE": "EPG", "LOC": "COL", "PER": "REP"} self.assertPRF1DictAlmostEqual( { "": PRF1(1.0, 1.0, 1.0), "REP": PRF1(1.0, 1.0, 1.0), "EPG": PRF1(1.0, 1.0, 1.0), "COL": PRF1(1.0, 1.0, 1.0), }, span_prf1_type_map(ref, ref, type_map=backwards_type_map), ) # Two incorrect entities: first PER is truncated, final GPE is a LOC incorrect_bio1 = [["B-PER", "O"], ["B-GPE"], ["B-GPE"]] incorrect1 = _create_docs(docs, incorrect_bio1, NLP) self.assertPRF1DictAlmostEqual( { "": PRF1(0.3333, 0.3333, 0.3333), "PER": PRF1(0.0, 0.0, 0.0), "GPE": PRF1(0.5, 1.0, 0.6666), "LOC": PRF1(0.0, 0.0, 0.0), }, span_prf1_type_map(ref, incorrect1), ) # When GPE and LOC are collapsed, only one error gpe_loc_map = {"GPE": "GPE_LOC", "LOC": "GPE_LOC"} self.assertPRF1DictAlmostEqual( { "": PRF1(0.6666, 0.6666, 0.6666), "PER": PRF1(0.0, 0.0, 0.0), "GPE_LOC": PRF1(1.0, 1.0, 1.0), }, span_prf1_type_map(ref, incorrect1, type_map=gpe_loc_map), )