def newConfsMap(): confs = {} for x in ['Exact L', 'Exact UL', 'Soft L', 'Soft UL', 'Bag L', 'Bag UL', 'Manning']: confs[x] = {'token': scoring.ConfusionMatrix(0,0,0,0), 'mention': scoring.ConfusionMatrix(0,0,0,0)} confs['Manning2'] = {'token': Counter(), 'mention': Counter()} softPR = {'UL': {'P': Counter(numer=0, denom=0), 'R': Counter(numer=0, denom=0)}, 'L': {'P': Counter(numer=0, denom=0), 'R': Counter(numer=0, denom=0)}} return confs, softPR
def tokenConfusions(goldseq, predseq, ignoreLabels=False, collapseNonO=False, scheme='BIO', bag=False, ignoreContinuation=False): n = nFound = nMissed = nExtra = 0 if bag: gC = Counter() pC = Counter() for g,p in zip(goldseq,predseq): gpm, gl = g ppm, pl = p if ignoreLabels: gl = pl = None if ignoreContinuation: if collapseNonO: if gpm!='O': gpm = 'B' if isPrimary(gpm,scheme) else gpm if ppm!='O': ppm = 'B' if isPrimary(ppm,scheme) else ppm if gpm=='O' or isPrimary(gpm,scheme): gC[(gpm,gl)] += 1 if ppm=='O' or isPrimary(ppm,scheme): pC[(ppm,pl)] += 1 else: if collapseNonO: if gpm!='O': gpm = primarize(gpm,scheme) if ppm!='O': ppm = primarize(ppm,scheme) gC[(gpm,gl)] += 1 pC[(ppm,pl)] += 1 n = sum(gC.values()) assert ignoreContinuation or n==sum(pC.values()) for tag in set(gC.keys()+pC.keys()): if tag[0]=='O': continue gn, pn = gC[tag], pC[tag] nFound += min(gn,pn) if gn>pn: nMissed += gn-pn elif gn<pn: nExtra += pn-gn else: for g,p in zip(goldseq,predseq): gpm, gl = g ppm, pl = p n += 1 if (gpm==ppm or (collapseNonO and (gpm=='O')==(ppm=='O'))) and (ignoreLabels or gl==pl): # correct tag if gpm!='O': nFound += 1 # true positive elif ppm=='O': nMissed += 1 # false negative else: nExtra += 1 # false positive return scoring.ConfusionMatrix(Both=nFound, Aonly=nMissed, Bonly=nExtra, Neither=n-nFound-nMissed-nExtra)
def softMentionConfusions(goldseq, predseq, ignoreLabels=False, matchCriterion=overlap, scheme='BIO'): ''' Any partial overlap between a gold and predicted mention counts as a match between the two mentions. Ignores labels. True positives and true negatives don't really make sense here, so we return 0 counts for these and use precision vs. recall calculations. >>> gold = [('B', 'PER'), ('I', 'PER'), ('I', 'PER'), ('B', 'ORG'), ('O', None), ('O', None), ('B', 'LOC'), ('I', 'LOC')] >>> pred = [('B', 'PER'), ('O', None), ('B', 'ORG'), ('I', 'ORG'), ('O', None), ('B', 'YYY'), ('B', 'ORG'), ('B', 'XXX')] >>> conf, precRatio, recRatio = softMentionConfusions(gold, pred, ignoreLabels=True) >>> assert conf==scoring.ConfusionMatrix(0, 1, 0, 0), conf >>> assert precRatio==Counter(numer=4, denom=5) >>> assert recRatio==Counter(numer=3, denom=3) >>> gold = [('B', 'PER'), ('I', 'PER'), ('I', 'PER'), ('B', 'ORG'), ('O', None), ('O', None), ('B', 'LOC'), ('I', 'LOC')] >>> pred = [('B', 'PER'), ('O', None), ('B', 'ORG'), ('I', 'ORG'), ('O', None), ('B', 'YYY'), ('B', 'ORG'), ('B', 'XXX')] >>> conf, precRatio, recRatio = softMentionConfusions(gold, pred, ignoreLabels=False) >>> assert conf==scoring.ConfusionMatrix(1, 3, 0, 0), conf >>> assert precRatio==Counter(numer=2, denom=5) >>> assert recRatio==Counter(numer=2, denom=3) ''' x = dict.keys if ignoreLabels else dict.items goldMentionSpans = set(x(mentionSpans(goldseq, includeOTokens=False, value='label', scheme=scheme))) goldOSpans = set(x(mentionSpans(goldseq, includeOTokens=True, value='label', scheme=scheme))).difference(goldMentionSpans) predMentionSpans = set(x(mentionSpans(predseq, includeOTokens=False, value='label', scheme=scheme))) predOSpans = set(x(mentionSpans(predseq, includeOTokens=True, value='label', scheme=scheme))).difference(predMentionSpans) if ignoreLabels: match = lambda g,p: matchCriterion(g,p) else: match = lambda g,p: matchCriterion(g[0],p[0]) and g[1]==p[1] nMatchedPred = nExtra = 0 uncoveredGold = set(goldMentionSpans) for p in predMentionSpans: matchedGold = {p} if p in goldMentionSpans else {g for g in goldMentionSpans if match(g,p)} if matchedGold: nMatchedPred += 1 uncoveredGold.difference_update(matchedGold) else: # prediction doesn't overlap with any gold mention nExtra += 1 nMatchedGold = len(goldMentionSpans) - len(uncoveredGold) return (scoring.ConfusionMatrix(Aonly=len(uncoveredGold), Bonly=nExtra, Both=0, Neither=0), Counter(numer=nMatchedPred, denom=len(predMentionSpans)), Counter(numer=nMatchedGold, denom=len(goldMentionSpans)))
def mentionConfusions(goldseq, predseq, ignoreLabels=False, scheme='BIO'): ''' >>> gold = [('B', 'PER'), ('I', 'PER'), ('I', 'PER'), ('B', 'ORG'), ('O', None), ('O', None), ('B', 'LOC'), ('B', 'XXX')] >>> pred = [('B', 'PER'), ('O', None), ('B', 'ORG'), ('I', 'ORG'), ('O', None), ('B', 'YYY'), ('B', 'ORG'), ('B', 'XXX')] >>> conf = mentionConfusions(gold, pred, ignoreLabels=True) >>> assert conf==scoring.ConfusionMatrix(2, 3, 2, 1), conf >>> gold = [('B', 'PER'), ('I', 'PER'), ('I', 'PER'), ('B', 'ORG'), ('O', None), ('O', None), ('B', 'LOC'), ('B', 'XXX')] >>> pred = [('B', 'PER'), ('O', None), ('B', 'ORG'), ('I', 'ORG'), ('O', None), ('B', 'YYY'), ('B', 'ORG'), ('B', 'XXX')] >>> conf = mentionConfusions(gold, pred, ignoreLabels=False) >>> assert conf==scoring.ConfusionMatrix(3, 4, 1, 1), conf ''' x = dict.keys if ignoreLabels else dict.items goldMentionSpans = set(x(mentionSpans(goldseq, includeOTokens=False, value='label', scheme=scheme))) goldOSpans = set(x(mentionSpans(goldseq, includeOTokens=True, value='label', scheme=scheme))).difference(goldMentionSpans) predMentionSpans = set(x(mentionSpans(predseq, includeOTokens=False, value='label', scheme=scheme))) predOSpans = set(x(mentionSpans(predseq, includeOTokens=True, value='label', scheme=scheme))).difference(predMentionSpans) return scoring.ConfusionMatrix(len(goldMentionSpans.difference(predMentionSpans)), len(predMentionSpans.difference(goldMentionSpans)), len(goldMentionSpans & predMentionSpans), len(goldOSpans & predOSpans))
if nIgnoredTokens>0: print('Ignoring {} tokens in {} sequences'.format(nIgnoredTokens, nIgnoredSeqs), file=sys.stderr) for lblset,(confs,softPR) in sorted(data.items(), key=lambda itm: itm[0]): if lblset==(): lblsS = 'All {} labels'.format(len(allLabels)) else: if len(allLabels)==1: continue lblsS = 'Labels: '+' '.join('(null)' if lbl is None else (repr(lbl) if re.search(r'\s|[\'"]', lbl) else lbl) for lbl in lblset) unseen = set(lblset).difference(allLabels) if unseen: print('Warning: some labels never seen in data:', unseen, file=sys.stderr) c = confs['Manning2']['token'] confs['Manning']['token'] = scoring.ConfusionMatrix(Both=c['tp'], Neither=c['tn'], Aonly=c['fn'], Bonly=c['fp']) c = confs['Manning2']['mention'] confs['Manning']['mention'] = scoring.ConfusionMatrix(Both=c['tp'], Neither=c['tn'], Aonly=c['fn'], Bonly=c['fp']) nGoldMentions = confs['Exact UL']['mention'].Atotal nPredMentions = confs['Exact UL']['mention'].Btotal print(''' {} {:5} {:5} TOKENS {:<5} {:5} MENTIONS {:<5} found xtra miss O/O found xtra miss O/O tp fp fn tn A% P% R% F1% tp fp fn tn P% R% F1%'''.format(lblsS, scheme, nTokens[lblset], nSeqs[lblset], nGoldMentions, nPredMentions)) # TODO: Manning score? for x in ['Exact L', 'Exact UL', 'Soft L', 'Soft UL', 'Bag L', 'Bag UL', 'Manning', 'Manning2']: print('{:8} '.format(x if x!='Manning2' else ''), end='')