def color_render(*args, **kwargs): # terminal colors WORDS = Colors.YELLOW VERBS = Colors.RED NOUNS = Colors.BLUE MWE = Colors.PLAINTEXT s = render(*args, **kwargs) c = WORDS + s.replace('_', MWE + '_' + WORDS) + Colors.PLAINTEXT c = re.sub(r'(\|v.\w+)', VERBS + r'\1' + WORDS, c) # verb supersenses c = re.sub(r'(\|n.\w+)', NOUNS + r'\1' + WORDS, c) # noun supersenses return c
def color_render(*args, **kwargs): # terminal colors WORDS = Colors.YELLOW VERBS = Colors.RED NOUNS = Colors.BLUE MWE = Colors.PLAINTEXT s = render(*args, **kwargs) c = WORDS+s.replace('_',MWE+'_'+WORDS)+Colors.PLAINTEXT c = re.sub(r'(\|v.\w+)', VERBS+r'\1'+WORDS, c) # verb supersenses c = re.sub(r'(\|n.\w+)', NOUNS+r'\1'+WORDS, c) # noun supersenses return c
assert all(len(t)<=1 for t in ptags_mwe) words, poses = zip(*gdata["words"]) assert len(words)==len(gtags_mwe)==len(ptags_mwe) nToks += len(words) stats['nFullTagCorrect'] += sum(1 for k in range(len(words)) if gtags_mwe[k]==ptags_mwe[k] and glbls.get(k)==plbls.get(k)) if printSents: if predFs[0] is predF: print(color_render(words, gdata["_"], [], {k+1: v for k,v in glbls.items()}), file=sys.stderr) print(color_render(words, pdata["_"], [], {k+1: v for k,v in plbls.items()}), file=sys.stderr) try: mweval_sent(zip(words,gtags_mwe,ptags_mwe), gdata["_"], pdata["_"], gmwetypes, pmwetypes, stats, indata=(gdata,pdata)) ssteval_sent(words, glbls, plbls, sststats, conf) except AssertionError as ex: print(render(words, gdata["_"], [])) print(render(words, pdata["_"], [])) raise ex # loaded all files and sentences. gmwetypes = gmwetypesCs[0] sysprefixes = [('SYS{:0'+str(len(str(len(predFs))))+'} ').format(i+1) if len(predFs)>1 else '' for i in range(len(predFs))] syspad = ' '*len(sysprefixes[0]) # MWE stats print(syspad+' P | R | F | EP | ER | EF | Acc | O | non-O | ingap | B vs I') for stats,conf,pmwetypes,sysprefix in zip(statsCs,confCs,pmwetypesCs,sysprefixes): fullAcc = Ratio(stats['nFullTagCorrect'], nToks) nTags = stats['correct']+stats['incorrect']
def identify(model, args): infile = args.file mwe = args.mwe evl = args.eval mwe_list = set() if args.mwe_list: with open(args.mwe_list) as f: for line in f: mwe_list.add(line.strip().split("\t")[0].strip()) elif "p_mwe" in model: mwe_list = set(model["p_mwe"]).union(set(PREPS_MASTER)) else: mwe_list = PREPS_MASTER non_prep_mwe_list = set() if args.mwe_anti_list: with open(args.mwe_anti_list) as f: for line in f: non_prep_mwe_list.add(line.strip().split("\t")[0].strip()) elif "non_p_mwe" in model: non_prep_mwe_list = model["non_p_mwe"] tp, fp, fn, tn = 0, 0, 0, 0 lemma_pos_counts = {} for sent in sentences(infile): for token in sent.tokens: if token.lemma not in lemma_pos_counts: lemma_pos_counts[token.lemma] = defaultdict(int) lemma_pos_counts[token.lemma][token.ud_pos] += 1 lemma_pos_counts[token.lemma][token.ptb_pos] += 1 max_mwe_length = max(len(w.split()) for w in mwe_list) print("max MWE length={}".format(max_mwe_length), file=sys.stderr) mw_beginners = set([ w.split()[0] for w in list(mwe_list) + list(non_prep_mwe_list) if len(w.split()) >= 2 ]).union(set(PREP_SPECIAL_MW_BEGINNERS)) for si, sent in enumerate(sentences(infile, conllulex=(evl or args.tp or args.fp or args.fn or args.tn)), start=1): if not (args.sst or evl or args.tp or args.fp or args.fn or args.tn): for metaline in sent.meta: print(metaline) mwes = [] mwe_counter = 1 current_mwe = [] length = len(sent.tokens) i = 0 k = 0 while i < length: token = sent.tokens[i] lexcat = "" if (evl or args.tp or args.fp or args.fn or args.tn): try: xlemma = token.fields[12] supersense = token.fields[13] except IndexError as e: print( "NOTE: the --eval, --tp, --fp, --fn, --tn options works ONLY with full .conllulex format", file=sys.stderr) sys.exit(1) t = False if (evl or args.tp or args.fp or args.fn or args.tn) and re.match( "^p", supersense): t = True lemma = token.lemma skip = False if i >= k and (not (evl or args.tp or args.fp or args.fn or args.tn) or supersense != "??"): if mwe and token.lemma in mw_beginners: for j in range( min(length, i + max_mwe_length) - 1, i + 1, -1): ngram = [t for t in sent.tokens[i:j]] ngram_lemma = " ".join([t.lemma for t in ngram]) if ngram_lemma in non_prep_mwe_list: skip = True k = j break if ngram_lemma in mwe_list: # find the longest possible mwe mwes.append([int(t.offset) for t in ngram]) token.checkmark = "{}:{}".format(mwe_counter, 1) + "**" lemma = ngram_lemma for current_mwe_counter, tok in enumerate( ngram[1:], start=2): sent.tokens[int(tok.offset) - 1].checkmark = "{}:{}".format( mwe_counter, current_mwe_counter) if ngram[-1].ud_pos in ("ADP", "SCONJ"): lexcat = "P" else: lexcat = "PP" mwe_counter += 1 k = j break if not token.checkmark and not skip: token.checkmark += heuristicADP(token) \ + heuristicPossessive(token, sent) \ + heuristicSCONJ(token, model) \ + heuristicADV(token) \ + heuristicTO(token, sent, model) \ + heuristicForXTo(token, sent) first_in_mwe = False if token.checkmark.endswith("*"): if token.checkmark.endswith("**"): first_in_mwe = True else: token.checkmark = "*" lexcat = { 'PRP$': 'PRON.POSS', 'WP$': 'PRON.POSS', 'POS': 'POSS', 'TO': 'INF.P' }.get(token.ptb_pos, "P") elif not (token.checkmark and token.checkmark[0].isdigit()): token.checkmark = "-" if not args.lexcat: lexcat = "" if token.checkmark == "*" or first_in_mwe: if t: # exact match if token.lexlemma == lemma: if args.tp and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) tp += 1 else: if args.fp and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) fp += 1 if args.fn and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) fn += 1 else: if args.fp and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) fp += 1 else: if t: if args.fn and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) fn += 1 else: if args.tn and not evl: print_target(token, sent, i, token.checkmark, lexcat, args.context) tn += 1 if not (args.sst or evl or args.tp or args.fp or args.fn or args.tn): print("{}\t{}".format(token.orig, token.checkmark) + ("\t{}".format(lexcat) if lexcat else "")) i += 1 if args.sst: _json = {} _json["words"] = [] _json["lemmas"] = [] _json["tags"] = [] _json["labels"] = {} _json["_"] = mwes _json["~"] = [] _sent = [] for tok in sent.tokens: _sent.append(tok.word) _json["words"].append([tok.word, tok.ptb_pos]) _json["lemmas"].append(tok.lemma) if tok.checkmark.endswith("*"): _json["labels"][tok.offset] = [tok.word, "Locus"] print("{}\t{}\t{}".format( sent.meta_dict.get( "sent_id", args.file.split("/")[-1].rsplit(".", maxsplit=1)[0] + "." + str(si)), tags2sst.render(_sent, _json["_"], []).decode("utf-8"), json.dumps(_json))) elif not (evl or args.tp or args.fp or args.fn or args.tn): print() if evl: print("\tgold+\tgold-") print("auto+\t{}\t{}\t{}".format(tp, fp, tp + fp)) print("auto-\t{}\t{}\t{}".format(fn, tn, fn + tn)) print("\t{}\t{}\t{}".format(tp + fn, fp + tn, tp + fp + tn + fn)) p = tp / (tp + fp) r = tp / (tp + fn) f = (2 * p * r) / (p + r) print("\nP\tR\tF") print("{}\t{}\t{}".format(p, r, f))
def evaluate(args): SPECTRUM = [Colors.BLUE,Colors.CYAN,Colors.GREEN,Colors.YELLOW,Colors.ORANGE,Colors.RED,Colors.PINK] printSents = False while args and args[0].startswith('-'): if args[0]=='-p': # print sentences to stderr printSents = True elif args[0]=='-C': # turn off colors for c in dir(Colors): if not c.startswith('_'): setattr(Colors, c, '') for s in dir(Styles): if not s.startswith('_'): setattr(Styles, s, '') SPECTRUM = [''] else: assert False,'Unexpected option: '+args[0] args = args[1:] # set up color defaults print(Colors.BACKGROUND + Colors.PLAINTEXT, end='') nToks = 0 goldLblsC = Counter() sent = [] goldFP = args[0] print ("goldFP: ", goldFP) print ("predFP: ", args[1]) predFs = [readsents(fileinput.input(predFP)) for predFP in args[1:]] statsCs = [Counter() for predFP in args[1:]] sststatsCs = [defaultdict(Counter) for predF in args[1:]] gmwetypesCs = [Counter() for predFP in args[1:]] # these will all have the same contents pmwetypesCs = [Counter() for predFP in args[1:]] confCs = [Counter() for predFP in args[1:]] # confusion matrix for sentId,gdata in readsents(fileinput.input(goldFP)): gtags_mwe = [t.encode('utf-8') for t in gdata["tags"]] #print ("t: ",t, gtags_mwe) assert all(len(t)<=1 for t in gtags_mwe) glbls = {k-1: v[1].encode('utf-8') for k,v in gdata["labels"].items()} goldLblsC.update(glbls.values()) for predF,stats,gmwetypes,pmwetypes,sststats,conf in zip(predFs,statsCs,gmwetypesCs,pmwetypesCs,sststatsCs,confCs): sentId,pdata = next(predF) ptags_mwe = [t.encode('utf-8') for t in pdata["tags"]] plbls = {k-1: v[1].encode('utf-8') for k,v in pdata["labels"].items()} assert all(len(t)<=1 for t in ptags_mwe) words, poses = zip(*gdata["words"]) assert len(words)==len(gtags_mwe)==len(ptags_mwe) nToks += len(words) stats['nFullTagCorrect'] += sum(1 for k in range(len(words)) if gtags_mwe[k]==ptags_mwe[k] and glbls.get(k)==plbls.get(k)) if printSents: if predFs[0] is predF: print(color_render(words, gdata["_"], [], {k+1: v for k,v in glbls.items()}), file=sys.stderr) print(color_render(words, pdata["_"], [], {k+1: v for k,v in plbls.items()}), file=sys.stderr) try: mweval_sent(zip(words,gtags_mwe,ptags_mwe), gdata["_"], pdata["_"], gmwetypes, pmwetypes, stats, indata=(gdata,pdata)) ssteval_sent(words, glbls, plbls, sststats, conf) except AssertionError as ex: print(render(words, gdata["_"], [])) print(render(words, pdata["_"], [])) raise ex # loaded all files and sentences. gmwetypes = gmwetypesCs[0] sysprefixes = [('SYS{:0'+str(len(str(len(predFs))))+'} ').format(i+1) if len(predFs)>1 else '' for i in range(len(predFs))] syspad = ' '*len(sysprefixes[0]) # MWE stats print(syspad+' P | R | F | EP | ER | EF | Acc | O | non-O | ingap | B vs I') for stats,conf,pmwetypes,sysprefix in zip(statsCs,confCs,pmwetypesCs,sysprefixes): fullAcc = Ratio(stats['nFullTagCorrect'], nToks) nTags = stats['correct']+stats['incorrect'] stats['Acc'] = Ratio(stats['correct'], nTags) stats['Tag_R_Oo'] = Ratio(stats['gold_pred_Oo'], stats['gold_Oo']) stats['Tag_R_non-Oo'] = Ratio(stats['gold_pred_non-Oo'], stats['gold_non-Oo']) stats['Tag_Acc_non-Oo_in-gap'] = Ratio(stats['gold_pred_non-Oo_in-or-out-of-gap_match'], stats['gold_pred_non-Oo']) stats['Tag_Acc_non-Oo_B-v-I'] = Ratio(stats['gold_pred_non-Oo_Bb-v-Ii_match'], stats['gold_pred_non-Oo']) stats['Tag_Acc_I_strength'] = Ratio(stats['gold_pred_Ii_strength_match'], stats['gold_pred_Ii']) stats['P'] = Ratio(stats['PNumer'], stats['PDenom']) stats['R'] = Ratio(stats['RNumer'], stats['RDenom']) stats['F'] = f1(stats['P'], stats['R']) stats['CrossGapP'] = stats['CrossGapPNumer']/stats['CrossGapPDenom'] if stats['CrossGapPDenom']>0 else float('nan') stats['CrossGapR'] = stats['CrossGapRNumer']/stats['CrossGapRDenom'] if stats['CrossGapRDenom']>0 else float('nan') stats['EP'] = Ratio(stats['ENumer'], stats['EPDenom']) stats['ER'] = Ratio(stats['ENumer'], stats['ERDenom']) stats['EF'] = f1(stats['EP'], stats['ER']) if gmwetypes: assert stats['Gold_#Groups']==sum(gmwetypes.values()) stats['Gold_#Types'] = len(gmwetypes) assert stats['Pred_#Groups']==sum(pmwetypes.values()) stats['Pred_#Types'] = len(pmwetypes) if len(predFs)==1: print('mwestats = ', dict(stats), ';', sep='') print() print('sststats = ', dict(sststats), ';', sep='') print() print('conf = ', dict(conf), ';', sep='') print() parts = [(' {1}{0:.2%}'.format(float(stats[x]), relativeColor(stats[x],statsCs[0][x]))+Colors.PLAINTEXT, '{:>7}'.format('' if x.endswith('F') or isinstance(stats[x],(float,int)) else stats[x].numeratorS), '{:>7}'.format('' if x.endswith('F') or isinstance(stats[x],(float,int)) else stats[x].denominatorS)) for x in ('P', 'R', 'F', 'EP', 'ER', 'EF', 'Acc', 'Tag_R_Oo', 'Tag_R_non-Oo', 'Tag_Acc_non-Oo_in-gap', 'Tag_Acc_non-Oo_B-v-I')] for j,pp in enumerate(zip(*parts)): print((sysprefix if j==0 else syspad)+' '.join(pp)) print() #print(pmwetypes) # Supersense stats if len(predFs)==1: # supersense confusion matrices colrs = {'n.': Colors.RED, 'v.': Colors.BLUE} fmts = {'n.': str.upper, 'v.': str.lower} for d,d2 in (('n.','v.'),('v.','n.')): matrix = [['{: >15}'.format('----')+' {:5}'.format(goldLblsC[None] or '')]] header = [' {}GOLD{} '.format(Styles.UNDERLINE, Styles.NORMAL),' ----'] lbls = [None] for lbl,n in goldLblsC.most_common(): if lbl.startswith(d): lbls.append(lbl) matrix.append([colrs[d]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)]) header.append(' '+colrs[d]+fmts[d](lbl[2:])[:4]+Colors.PLAINTEXT) # cross-POS confusions gconfsC = Counter([p for (g,p),n in conf.most_common() if g and p and g.startswith(d) for i in range(n)]) for lbl,n in sorted(gconfsC.most_common(), key=lambda (l,lN): not l.startswith(d)): if lbl not in lbls: lbls.append(lbl) #matrix.append([colrs[d2]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)]) header.append(' '+colrs[lbl[:2]]+fmts[lbl[:2]](lbl[2:])[:4]+Colors.PLAINTEXT) # since this label is for the other part of speech, show as a column (predicted) but not a row (gold) header.append(' <-- PRED') # matrix content if not conf: print(Colors.RED+'No gold or predicted supersenses found: check that the input is in the right format. Exiting.'+Colors.RED+Colors.ENDC) sys.exit(1) nondiag_max = [n for (g,p),n in conf.most_common() if (g is None or g.startswith(d)) and g!=p][0] for i,g in enumerate(lbls): if i>=len(matrix): continue for j,p in enumerate(lbls): while len(matrix[i])<=j+1: matrix[i].append('') v = conf[g,p] #if v>0 or i==j: # print(v, g,p, int((v-1)/nondiag_max*len(SPECTRUM)), nondiag_max) colr = SPECTRUM[int((v-1)/nondiag_max*len(SPECTRUM))] if v>0 and i!=j else Colors.PLAINTEXT matrix[i][j+1] = colr+' {:4}'.format(conf[g,p] or '')+Colors.PLAINTEXT print(''.join(header)) for ln in matrix: print(''.join(ln)) print() # supersense scores print(syspad+' Acc | P | R | F || R: NSST | VSST ') for sststats,sysprefix in zip(sststatsCs,sysprefixes): parts = [(' {1}{0:.2%}'.format(float(sststats['Exact Tag']['Acc']), relativeColor(sststats['Exact Tag']['Acc'],sststatsCs[0]['Exact Tag']['Acc']))+Colors.PLAINTEXT, '{:>7}'.format(sststats['Exact Tag']['Acc'].numeratorS), '{:>7}'.format(sststats['Exact Tag']['Acc'].denominatorS))] parts += [(' {1}{0:.2%}'.format(float(sststats[None][x]), relativeColor(sststats[None][x],sststatsCs[0][None][x]))+Colors.PLAINTEXT, '{:>7}'.format(sststats[None][x].numeratorS), '{:>7}'.format(sststats[None][x].denominatorS)) for x in ('P', 'R')] parts += [(' {1}{0:.2%} '.format(float(sststats[None]['F']), relativeColor(sststats[None]['F'],sststatsCs[0][None]['F']))+Colors.PLAINTEXT, ' ', ' ')] parts += [(' {1}{0:.2%}'.format(float(sststats[y]['R']), relativeColor(sststats[y]['R'],sststatsCs[0][y]['R']))+Colors.PLAINTEXT, '{:>7}'.format(sststats[y]['R'].numeratorS), '{:>7}'.format(sststats[y]['R'].denominatorS)) for y in ('n', 'v')] for j,pp in enumerate(zip(*parts)): print((sysprefix if j==0 else syspad)+' '.join(pp)) print() # combined acc, P, R, F print(syspad+' Acc | P | R | F ') cstatsBL = None for sststats,sysprefix in zip(sststatsCs,sysprefixes): cstats = Counter() cstats['Acc'] = fullAcc cstats['P'] = Ratio(stats['P'].numerator + sststats[None]['P'].numerator, stats['P'].denominator + sststats[None]['P'].denominator) cstats['R'] = Ratio(stats['R'].numerator + sststats[None]['R'].numerator, stats['R'].denominator + sststats[None]['R'].denominator) cstats['F'] = f1(cstats['P'], cstats['R']) if cstatsBL is None: cstatsBL = cstats parts = [(' {1}{0:.2%}'.format(float(cstats[x]), relativeColor(cstats[x],cstatsBL[x]))+Colors.PLAINTEXT, '{:>7}'.format('' if x.endswith('F') or isinstance(cstats[x],(float,int)) else cstats[x].numeratorS), '{:>7}'.format('' if x.endswith('F') or isinstance(cstats[x],(float,int)) else cstats[x].denominatorS)) for x in ('Acc', 'P', 'R', 'F')] for j,pp in enumerate(zip(*parts)): print((sysprefix if j==0 else syspad)+' '.join(pp)) if len(predFs)==1: print() print('SUMMARY SCORES') print('==============') print(re.sub(r'=([^=]+)$', '='+Colors.YELLOW+r'\1'+Colors.PLAINTEXT, 'MWEs: P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=stats, f=float(stats['F'])))) print(re.sub(r'=([^=]+)$', '='+Colors.PINK+r'\1'+Colors.PLAINTEXT, 'Supersenses: P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=sststats[None], f=float(sststats[None]['F'])))) print(re.sub(r'=([^=]+)$', '='+Colors.GREEN+r'\1'+Colors.PLAINTEXT, 'Combined: Acc={stats[Acc]} P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=cstats, f=float(cstats['F'])))) # restore the terminal's default colors print(Colors.ENDC, end='') return np.array([float(stats['F']),float(sststats[None]['F']),float(cstats['F'])])
def evaluate(args): SPECTRUM = [ Colors.BLUE, Colors.CYAN, Colors.GREEN, Colors.YELLOW, Colors.ORANGE, Colors.RED, Colors.PINK ] printSents = False while args and args[0].startswith('-'): if args[0] == '-p': # print sentences to stderr printSents = True elif args[0] == '-C': # turn off colors for c in dir(Colors): if not c.startswith('_'): setattr(Colors, c, '') for s in dir(Styles): if not s.startswith('_'): setattr(Styles, s, '') SPECTRUM = [''] else: assert False, 'Unexpected option: ' + args[0] args = args[1:] # set up color defaults print(Colors.BACKGROUND + Colors.PLAINTEXT, end='') nToks = 0 goldLblsC = Counter() sent = [] goldFP = args[0] print("goldFP: ", goldFP) print("predFP: ", args[1]) predFs = [readsents(fileinput.input(predFP)) for predFP in args[1:]] statsCs = [Counter() for predFP in args[1:]] sststatsCs = [defaultdict(Counter) for predF in args[1:]] gmwetypesCs = [Counter() for predFP in args[1:] ] # these will all have the same contents pmwetypesCs = [Counter() for predFP in args[1:]] confCs = [Counter() for predFP in args[1:]] # confusion matrix for sentId, gdata in readsents(fileinput.input(goldFP)): gtags_mwe = [t.encode('utf-8') for t in gdata["tags"]] #print ("t: ",t, gtags_mwe) assert all(len(t) <= 1 for t in gtags_mwe) glbls = { k - 1: v[1].encode('utf-8') for k, v in gdata["labels"].items() } goldLblsC.update(glbls.values()) for predF, stats, gmwetypes, pmwetypes, sststats, conf in zip( predFs, statsCs, gmwetypesCs, pmwetypesCs, sststatsCs, confCs): sentId, pdata = next(predF) ptags_mwe = [t.encode('utf-8') for t in pdata["tags"]] plbls = { k - 1: v[1].encode('utf-8') for k, v in pdata["labels"].items() } assert all(len(t) <= 1 for t in ptags_mwe) words, poses = zip(*gdata["words"]) assert len(words) == len(gtags_mwe) == len(ptags_mwe) nToks += len(words) stats['nFullTagCorrect'] += sum(1 for k in range(len(words)) if gtags_mwe[k] == ptags_mwe[k] and glbls.get(k) == plbls.get(k)) if printSents: if predFs[0] is predF: print(color_render(words, gdata["_"], [], {k + 1: v for k, v in glbls.items()}), file=sys.stderr) print(color_render(words, pdata["_"], [], {k + 1: v for k, v in plbls.items()}), file=sys.stderr) try: mweval_sent(zip(words, gtags_mwe, ptags_mwe), gdata["_"], pdata["_"], gmwetypes, pmwetypes, stats, indata=(gdata, pdata)) ssteval_sent(words, glbls, plbls, sststats, conf) except AssertionError as ex: print(render(words, gdata["_"], [])) print(render(words, pdata["_"], [])) raise ex # loaded all files and sentences. gmwetypes = gmwetypesCs[0] sysprefixes = [('SYS{:0' + str(len(str(len(predFs)))) + '} ').format(i + 1) if len(predFs) > 1 else '' for i in range(len(predFs))] syspad = ' ' * len(sysprefixes[0]) # MWE stats print( syspad + ' P | R | F | EP | ER | EF | Acc | O | non-O | ingap | B vs I' ) for stats, conf, pmwetypes, sysprefix in zip(statsCs, confCs, pmwetypesCs, sysprefixes): fullAcc = Ratio(stats['nFullTagCorrect'], nToks) nTags = stats['correct'] + stats['incorrect'] stats['Acc'] = Ratio(stats['correct'], nTags) stats['Tag_R_Oo'] = Ratio(stats['gold_pred_Oo'], stats['gold_Oo']) stats['Tag_R_non-Oo'] = Ratio(stats['gold_pred_non-Oo'], stats['gold_non-Oo']) stats['Tag_Acc_non-Oo_in-gap'] = Ratio( stats['gold_pred_non-Oo_in-or-out-of-gap_match'], stats['gold_pred_non-Oo']) stats['Tag_Acc_non-Oo_B-v-I'] = Ratio( stats['gold_pred_non-Oo_Bb-v-Ii_match'], stats['gold_pred_non-Oo']) stats['Tag_Acc_I_strength'] = Ratio( stats['gold_pred_Ii_strength_match'], stats['gold_pred_Ii']) stats['P'] = Ratio(stats['PNumer'], stats['PDenom']) stats['R'] = Ratio(stats['RNumer'], stats['RDenom']) stats['F'] = f1(stats['P'], stats['R']) stats['CrossGapP'] = stats['CrossGapPNumer'] / stats[ 'CrossGapPDenom'] if stats['CrossGapPDenom'] > 0 else float('nan') stats['CrossGapR'] = stats['CrossGapRNumer'] / stats[ 'CrossGapRDenom'] if stats['CrossGapRDenom'] > 0 else float('nan') stats['EP'] = Ratio(stats['ENumer'], stats['EPDenom']) stats['ER'] = Ratio(stats['ENumer'], stats['ERDenom']) stats['EF'] = f1(stats['EP'], stats['ER']) if gmwetypes: assert stats['Gold_#Groups'] == sum(gmwetypes.values()) stats['Gold_#Types'] = len(gmwetypes) assert stats['Pred_#Groups'] == sum(pmwetypes.values()) stats['Pred_#Types'] = len(pmwetypes) if len(predFs) == 1: print('mwestats = ', dict(stats), ';', sep='') print() print('sststats = ', dict(sststats), ';', sep='') print() print('conf = ', dict(conf), ';', sep='') print() parts = [(' {1}{0:.2%}'.format(float( stats[x]), relativeColor(stats[x], statsCs[0][x])) + Colors.PLAINTEXT, '{:>7}'.format('' if x.endswith('F') or isinstance( stats[x], (float, int)) else stats[x].numeratorS), '{:>7}'.format('' if x.endswith('F') or isinstance( stats[x], (float, int)) else stats[x].denominatorS)) for x in ('P', 'R', 'F', 'EP', 'ER', 'EF', 'Acc', 'Tag_R_Oo', 'Tag_R_non-Oo', 'Tag_Acc_non-Oo_in-gap', 'Tag_Acc_non-Oo_B-v-I')] for j, pp in enumerate(zip(*parts)): print((sysprefix if j == 0 else syspad) + ' '.join(pp)) print() #print(pmwetypes) # Supersense stats if len(predFs) == 1: # supersense confusion matrices colrs = {'n.': Colors.RED, 'v.': Colors.BLUE} fmts = {'n.': str.upper, 'v.': str.lower} for d, d2 in (('n.', 'v.'), ('v.', 'n.')): matrix = [[ '{: >15}'.format('----') + ' {:5}'.format(goldLblsC[None] or '') ]] header = [ ' {}GOLD{} '.format(Styles.UNDERLINE, Styles.NORMAL), ' ----' ] lbls = [None] for lbl, n in goldLblsC.most_common(): if lbl.startswith(d): lbls.append(lbl) matrix.append([ colrs[d] + '{: >15}'.format(lbl) + Colors.PLAINTEXT + ' {:5}'.format(n) ]) header.append(' ' + colrs[d] + fmts[d](lbl[2:])[:4] + Colors.PLAINTEXT) # cross-POS confusions gconfsC = Counter([ p for (g, p), n in conf.most_common() if g and p and g.startswith(d) for i in range(n) ]) for lbl, n in sorted(gconfsC.most_common(), key=lambda (l, lN): not l.startswith(d)): if lbl not in lbls: lbls.append(lbl) #matrix.append([colrs[d2]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)]) header.append(' ' + colrs[lbl[:2]] + fmts[lbl[:2]](lbl[2:])[:4] + Colors.PLAINTEXT) # since this label is for the other part of speech, show as a column (predicted) but not a row (gold) header.append(' <-- PRED') # matrix content if not conf: print( Colors.RED + 'No gold or predicted supersenses found: check that the input is in the right format. Exiting.' + Colors.RED + Colors.ENDC) sys.exit(1) nondiag_max = [ n for (g, p), n in conf.most_common() if (g is None or g.startswith(d)) and g != p ][0] for i, g in enumerate(lbls): if i >= len(matrix): continue for j, p in enumerate(lbls): while len(matrix[i]) <= j + 1: matrix[i].append('') v = conf[g, p] #if v>0 or i==j: # print(v, g,p, int((v-1)/nondiag_max*len(SPECTRUM)), nondiag_max) colr = SPECTRUM[int( (v - 1) / nondiag_max * len(SPECTRUM) )] if v > 0 and i != j else Colors.PLAINTEXT matrix[i][j + 1] = colr + ' {:4}'.format( conf[g, p] or '') + Colors.PLAINTEXT print(''.join(header)) for ln in matrix: print(''.join(ln)) print() # supersense scores print(syspad + ' Acc | P | R | F || R: NSST | VSST ') for sststats, sysprefix in zip(sststatsCs, sysprefixes): parts = [(' {1}{0:.2%}'.format( float(sststats['Exact Tag']['Acc']), relativeColor(sststats['Exact Tag']['Acc'], sststatsCs[0]['Exact Tag']['Acc'])) + Colors.PLAINTEXT, '{:>7}'.format(sststats['Exact Tag']['Acc'].numeratorS), '{:>7}'.format(sststats['Exact Tag']['Acc'].denominatorS))] parts += [ (' {1}{0:.2%}'.format( float(sststats[None][x]), relativeColor(sststats[None][x], sststatsCs[0][None][x])) + Colors.PLAINTEXT, '{:>7}'.format(sststats[None][x].numeratorS), '{:>7}'.format(sststats[None][x].denominatorS)) for x in ('P', 'R') ] parts += [(' {1}{0:.2%} '.format( float(sststats[None]['F']), relativeColor(sststats[None]['F'], sststatsCs[0][None]['F'])) + Colors.PLAINTEXT, ' ', ' ')] parts += [ (' {1}{0:.2%}'.format( float(sststats[y]['R']), relativeColor(sststats[y]['R'], sststatsCs[0][y]['R'])) + Colors.PLAINTEXT, '{:>7}'.format(sststats[y]['R'].numeratorS), '{:>7}'.format(sststats[y]['R'].denominatorS)) for y in ('n', 'v') ] for j, pp in enumerate(zip(*parts)): print((sysprefix if j == 0 else syspad) + ' '.join(pp)) print() # combined acc, P, R, F print(syspad + ' Acc | P | R | F ') cstatsBL = None for sststats, sysprefix in zip(sststatsCs, sysprefixes): cstats = Counter() cstats['Acc'] = fullAcc cstats['P'] = Ratio( stats['P'].numerator + sststats[None]['P'].numerator, stats['P'].denominator + sststats[None]['P'].denominator) cstats['R'] = Ratio( stats['R'].numerator + sststats[None]['R'].numerator, stats['R'].denominator + sststats[None]['R'].denominator) cstats['F'] = f1(cstats['P'], cstats['R']) if cstatsBL is None: cstatsBL = cstats parts = [(' {1}{0:.2%}'.format(float(cstats[x]), relativeColor(cstats[x], cstatsBL[x])) + Colors.PLAINTEXT, '{:>7}'.format('' if x.endswith('F') or isinstance( cstats[x], (float, int)) else cstats[x].numeratorS), '{:>7}'.format('' if x.endswith('F') or isinstance( cstats[x], (float, int)) else cstats[x].denominatorS)) for x in ('Acc', 'P', 'R', 'F')] for j, pp in enumerate(zip(*parts)): print((sysprefix if j == 0 else syspad) + ' '.join(pp)) if len(predFs) == 1: print() print('SUMMARY SCORES') print('==============') print( re.sub( r'=([^=]+)$', '=' + Colors.YELLOW + r'\1' + Colors.PLAINTEXT, 'MWEs: P={stats[P]} R={stats[R]} F={f:.2%}'.format( stats=stats, f=float(stats['F'])))) print( re.sub( r'=([^=]+)$', '=' + Colors.PINK + r'\1' + Colors.PLAINTEXT, 'Supersenses: P={stats[P]} R={stats[R]} F={f:.2%}'.format( stats=sststats[None], f=float(sststats[None]['F'])))) print( re.sub( r'=([^=]+)$', '=' + Colors.GREEN + r'\1' + Colors.PLAINTEXT, 'Combined: Acc={stats[Acc]} P={stats[P]} R={stats[R]} F={f:.2%}' .format(stats=cstats, f=float(cstats['F'])))) # restore the terminal's default colors print(Colors.ENDC, end='') return np.array( [float(stats['F']), float(sststats[None]['F']), float(cstats['F'])])
print(color_render(words, pdata["_"], [], {k + 1: v for k, v in plbls.items()}), file=sys.stderr) try: mweval_sent(zip(words, gtags_mwe, ptags_mwe), gdata["_"], pdata["_"], gmwetypes, pmwetypes, stats, indata=(gdata, pdata)) ssteval_sent(words, glbls, plbls, sststats, conf) except AssertionError as ex: print(render(words, gdata["_"], [])) print(render(words, pdata["_"], [])) raise ex # loaded all files and sentences. gmwetypes = gmwetypesCs[0] sysprefixes = [('SYS{:0' + str(len(str(len(predFs)))) + '} ').format(i + 1) if len(predFs) > 1 else '' for i in range(len(predFs))] syspad = ' ' * len(sysprefixes[0]) # MWE stats print( syspad + ' P | R | F | EP | ER | EF | Acc | O | non-O | ingap | B vs I'