Beispiel #1
0
def color_render(*args, **kwargs):
    # terminal colors
    WORDS = Colors.YELLOW
    VERBS = Colors.RED
    NOUNS = Colors.BLUE
    MWE = Colors.PLAINTEXT

    s = render(*args, **kwargs)
    c = WORDS + s.replace('_', MWE + '_' + WORDS) + Colors.PLAINTEXT
    c = re.sub(r'(\|v.\w+)', VERBS + r'\1' + WORDS, c)  # verb supersenses
    c = re.sub(r'(\|n.\w+)', NOUNS + r'\1' + WORDS, c)  # noun supersenses

    return c
Beispiel #2
0
def color_render(*args, **kwargs):
    # terminal colors
    WORDS = Colors.YELLOW
    VERBS = Colors.RED
    NOUNS = Colors.BLUE
    MWE = Colors.PLAINTEXT

    s = render(*args, **kwargs)
    c = WORDS+s.replace('_',MWE+'_'+WORDS)+Colors.PLAINTEXT
    c = re.sub(r'(\|v.\w+)', VERBS+r'\1'+WORDS, c)   # verb supersenses
    c = re.sub(r'(\|n.\w+)', NOUNS+r'\1'+WORDS, c)   # noun supersenses

    return c
Beispiel #3
0
            assert all(len(t)<=1 for t in ptags_mwe)
            words, poses = zip(*gdata["words"])
            assert len(words)==len(gtags_mwe)==len(ptags_mwe)
            nToks += len(words)
            stats['nFullTagCorrect'] += sum(1 for k in range(len(words)) if gtags_mwe[k]==ptags_mwe[k] and glbls.get(k)==plbls.get(k))
            if printSents:
                if predFs[0] is predF:
                    print(color_render(words, gdata["_"], [], {k+1: v for k,v in glbls.items()}), file=sys.stderr)
                print(color_render(words, pdata["_"], [], {k+1: v for k,v in plbls.items()}), file=sys.stderr)
            try:
                mweval_sent(zip(words,gtags_mwe,ptags_mwe), gdata["_"], pdata["_"],
                            gmwetypes, pmwetypes, stats, indata=(gdata,pdata))

                ssteval_sent(words, glbls, plbls, sststats, conf)
            except AssertionError as ex:
                print(render(words, gdata["_"], []))
                print(render(words, pdata["_"], []))
                raise ex

    # loaded all files and sentences.
    gmwetypes = gmwetypesCs[0]

    sysprefixes = [('SYS{:0'+str(len(str(len(predFs))))+'}  ').format(i+1) if len(predFs)>1 else '' for i in range(len(predFs))]
    syspad = ' '*len(sysprefixes[0])

    # MWE stats
    print(syspad+'   P   |   R   |   F   |   EP  |   ER  |   EF  |  Acc  |   O   | non-O | ingap | B vs I')
    for stats,conf,pmwetypes,sysprefix in zip(statsCs,confCs,pmwetypesCs,sysprefixes):
        fullAcc = Ratio(stats['nFullTagCorrect'], nToks)

        nTags = stats['correct']+stats['incorrect']
Beispiel #4
0
def identify(model, args):

    infile = args.file
    mwe = args.mwe
    evl = args.eval

    mwe_list = set()
    if args.mwe_list:
        with open(args.mwe_list) as f:
            for line in f:
                mwe_list.add(line.strip().split("\t")[0].strip())
    elif "p_mwe" in model:
        mwe_list = set(model["p_mwe"]).union(set(PREPS_MASTER))
    else:
        mwe_list = PREPS_MASTER

    non_prep_mwe_list = set()
    if args.mwe_anti_list:
        with open(args.mwe_anti_list) as f:
            for line in f:
                non_prep_mwe_list.add(line.strip().split("\t")[0].strip())
    elif "non_p_mwe" in model:
        non_prep_mwe_list = model["non_p_mwe"]

    tp, fp, fn, tn = 0, 0, 0, 0

    lemma_pos_counts = {}
    for sent in sentences(infile):
        for token in sent.tokens:
            if token.lemma not in lemma_pos_counts:
                lemma_pos_counts[token.lemma] = defaultdict(int)
            lemma_pos_counts[token.lemma][token.ud_pos] += 1
            lemma_pos_counts[token.lemma][token.ptb_pos] += 1

    max_mwe_length = max(len(w.split()) for w in mwe_list)
    print("max MWE length={}".format(max_mwe_length), file=sys.stderr)
    mw_beginners = set([
        w.split()[0] for w in list(mwe_list) + list(non_prep_mwe_list)
        if len(w.split()) >= 2
    ]).union(set(PREP_SPECIAL_MW_BEGINNERS))

    for si, sent in enumerate(sentences(infile,
                                        conllulex=(evl or args.tp or args.fp
                                                   or args.fn or args.tn)),
                              start=1):
        if not (args.sst or evl or args.tp or args.fp or args.fn or args.tn):
            for metaline in sent.meta:
                print(metaline)

        mwes = []
        mwe_counter = 1
        current_mwe = []

        length = len(sent.tokens)
        i = 0
        k = 0
        while i < length:
            token = sent.tokens[i]
            lexcat = ""
            if (evl or args.tp or args.fp or args.fn or args.tn):
                try:
                    xlemma = token.fields[12]
                    supersense = token.fields[13]
                except IndexError as e:
                    print(
                        "NOTE: the --eval, --tp, --fp, --fn, --tn options works ONLY with full .conllulex format",
                        file=sys.stderr)
                    sys.exit(1)

            t = False
            if (evl or args.tp or args.fp or args.fn or args.tn) and re.match(
                    "^p", supersense):
                t = True

            lemma = token.lemma
            skip = False
            if i >= k and (not (evl or args.tp or args.fp or args.fn
                                or args.tn) or supersense != "??"):
                if mwe and token.lemma in mw_beginners:
                    for j in range(
                            min(length, i + max_mwe_length) - 1, i + 1, -1):
                        ngram = [t for t in sent.tokens[i:j]]
                        ngram_lemma = " ".join([t.lemma for t in ngram])
                        if ngram_lemma in non_prep_mwe_list:
                            skip = True
                            k = j
                            break
                        if ngram_lemma in mwe_list:  # find the longest possible mwe
                            mwes.append([int(t.offset) for t in ngram])
                            token.checkmark = "{}:{}".format(mwe_counter,
                                                             1) + "**"
                            lemma = ngram_lemma
                            for current_mwe_counter, tok in enumerate(
                                    ngram[1:], start=2):
                                sent.tokens[int(tok.offset) -
                                            1].checkmark = "{}:{}".format(
                                                mwe_counter,
                                                current_mwe_counter)
                            if ngram[-1].ud_pos in ("ADP", "SCONJ"):
                                lexcat = "P"
                            else:
                                lexcat = "PP"
                            mwe_counter += 1
                            k = j
                            break

                if not token.checkmark and not skip:
                    token.checkmark += heuristicADP(token) \
                                       + heuristicPossessive(token, sent) \
                                       + heuristicSCONJ(token, model) \
                                       + heuristicADV(token) \
                                       + heuristicTO(token, sent, model) \
                                       + heuristicForXTo(token, sent)

            first_in_mwe = False
            if token.checkmark.endswith("*"):
                if token.checkmark.endswith("**"):
                    first_in_mwe = True
                else:
                    token.checkmark = "*"
                    lexcat = {
                        'PRP$': 'PRON.POSS',
                        'WP$': 'PRON.POSS',
                        'POS': 'POSS',
                        'TO': 'INF.P'
                    }.get(token.ptb_pos, "P")
            elif not (token.checkmark and token.checkmark[0].isdigit()):
                token.checkmark = "-"

            if not args.lexcat:
                lexcat = ""

            if token.checkmark == "*" or first_in_mwe:
                if t:
                    # exact match
                    if token.lexlemma == lemma:
                        if args.tp and not evl:
                            print_target(token, sent, i, token.checkmark,
                                         lexcat, args.context)
                        tp += 1
                    else:
                        if args.fp and not evl:
                            print_target(token, sent, i, token.checkmark,
                                         lexcat, args.context)
                        fp += 1
                        if args.fn and not evl:
                            print_target(token, sent, i, token.checkmark,
                                         lexcat, args.context)
                        fn += 1
                else:
                    if args.fp and not evl:
                        print_target(token, sent, i, token.checkmark, lexcat,
                                     args.context)
                    fp += 1
            else:
                if t:
                    if args.fn and not evl:
                        print_target(token, sent, i, token.checkmark, lexcat,
                                     args.context)
                    fn += 1
                else:
                    if args.tn and not evl:
                        print_target(token, sent, i, token.checkmark, lexcat,
                                     args.context)
                    tn += 1

            if not (args.sst or evl or args.tp or args.fp or args.fn
                    or args.tn):
                print("{}\t{}".format(token.orig, token.checkmark) +
                      ("\t{}".format(lexcat) if lexcat else ""))

            i += 1

        if args.sst:
            _json = {}
            _json["words"] = []
            _json["lemmas"] = []
            _json["tags"] = []
            _json["labels"] = {}
            _json["_"] = mwes
            _json["~"] = []
            _sent = []
            for tok in sent.tokens:
                _sent.append(tok.word)
                _json["words"].append([tok.word, tok.ptb_pos])
                _json["lemmas"].append(tok.lemma)
                if tok.checkmark.endswith("*"):
                    _json["labels"][tok.offset] = [tok.word, "Locus"]
            print("{}\t{}\t{}".format(
                sent.meta_dict.get(
                    "sent_id",
                    args.file.split("/")[-1].rsplit(".", maxsplit=1)[0] + "." +
                    str(si)),
                tags2sst.render(_sent, _json["_"], []).decode("utf-8"),
                json.dumps(_json)))

        elif not (evl or args.tp or args.fp or args.fn or args.tn):
            print()

    if evl:
        print("\tgold+\tgold-")
        print("auto+\t{}\t{}\t{}".format(tp, fp, tp + fp))
        print("auto-\t{}\t{}\t{}".format(fn, tn, fn + tn))
        print("\t{}\t{}\t{}".format(tp + fn, fp + tn, tp + fp + tn + fn))
        p = tp / (tp + fp)
        r = tp / (tp + fn)
        f = (2 * p * r) / (p + r)
        print("\nP\tR\tF")
        print("{}\t{}\t{}".format(p, r, f))
Beispiel #5
0
def evaluate(args):
    SPECTRUM = [Colors.BLUE,Colors.CYAN,Colors.GREEN,Colors.YELLOW,Colors.ORANGE,Colors.RED,Colors.PINK]


    printSents = False
    while args and args[0].startswith('-'):
        if args[0]=='-p':   # print sentences to stderr
            printSents = True
        elif args[0]=='-C': # turn off colors
            for c in dir(Colors):
                if not c.startswith('_'):
                    setattr(Colors, c, '')
            for s in dir(Styles):
                if not s.startswith('_'):
                    setattr(Styles, s, '')
            SPECTRUM = ['']
        else:
            assert False,'Unexpected option: '+args[0]
        args = args[1:]

    # set up color defaults
    print(Colors.BACKGROUND + Colors.PLAINTEXT, end='')

    nToks = 0

    goldLblsC = Counter()



    sent = []
    goldFP = args[0]
    print ("goldFP: ", goldFP)
    print ("predFP: ", args[1])
    predFs = [readsents(fileinput.input(predFP)) for predFP in args[1:]]
    statsCs = [Counter() for predFP in args[1:]]
    sststatsCs = [defaultdict(Counter) for predF in args[1:]]
    gmwetypesCs = [Counter() for predFP in args[1:]]    # these will all have the same contents
    pmwetypesCs = [Counter() for predFP in args[1:]]
    confCs = [Counter() for predFP in args[1:]]    # confusion matrix

    for sentId,gdata in readsents(fileinput.input(goldFP)):
        gtags_mwe = [t.encode('utf-8') for t in gdata["tags"]]
        #print ("t: ",t, gtags_mwe)
        assert all(len(t)<=1 for t in gtags_mwe)
        glbls = {k-1: v[1].encode('utf-8') for k,v in gdata["labels"].items()}
        goldLblsC.update(glbls.values())
        for predF,stats,gmwetypes,pmwetypes,sststats,conf in zip(predFs,statsCs,gmwetypesCs,pmwetypesCs,sststatsCs,confCs):
            sentId,pdata = next(predF)
            ptags_mwe = [t.encode('utf-8') for t in pdata["tags"]]
            plbls = {k-1: v[1].encode('utf-8') for k,v in pdata["labels"].items()}
            assert all(len(t)<=1 for t in ptags_mwe)
            words, poses = zip(*gdata["words"])
            assert len(words)==len(gtags_mwe)==len(ptags_mwe)
            nToks += len(words)
            stats['nFullTagCorrect'] += sum(1 for k in range(len(words)) if gtags_mwe[k]==ptags_mwe[k] and glbls.get(k)==plbls.get(k))
            if printSents:
                if predFs[0] is predF:
                    print(color_render(words, gdata["_"], [], {k+1: v for k,v in glbls.items()}), file=sys.stderr)
                print(color_render(words, pdata["_"], [], {k+1: v for k,v in plbls.items()}), file=sys.stderr)
            try:
                mweval_sent(zip(words,gtags_mwe,ptags_mwe), gdata["_"], pdata["_"],
                            gmwetypes, pmwetypes, stats, indata=(gdata,pdata))

                ssteval_sent(words, glbls, plbls, sststats, conf)
            except AssertionError as ex:
                print(render(words, gdata["_"], []))
                print(render(words, pdata["_"], []))
                raise ex

    # loaded all files and sentences.
    gmwetypes = gmwetypesCs[0]

    sysprefixes = [('SYS{:0'+str(len(str(len(predFs))))+'}  ').format(i+1) if len(predFs)>1 else '' for i in range(len(predFs))]
    syspad = ' '*len(sysprefixes[0])

    # MWE stats
    print(syspad+'   P   |   R   |   F   |   EP  |   ER  |   EF  |  Acc  |   O   | non-O | ingap | B vs I')
    for stats,conf,pmwetypes,sysprefix in zip(statsCs,confCs,pmwetypesCs,sysprefixes):
        fullAcc = Ratio(stats['nFullTagCorrect'], nToks)

        nTags = stats['correct']+stats['incorrect']
        stats['Acc'] = Ratio(stats['correct'], nTags)
        stats['Tag_R_Oo'] = Ratio(stats['gold_pred_Oo'], stats['gold_Oo'])
        stats['Tag_R_non-Oo'] = Ratio(stats['gold_pred_non-Oo'], stats['gold_non-Oo'])
        stats['Tag_Acc_non-Oo_in-gap'] = Ratio(stats['gold_pred_non-Oo_in-or-out-of-gap_match'], stats['gold_pred_non-Oo'])
        stats['Tag_Acc_non-Oo_B-v-I'] = Ratio(stats['gold_pred_non-Oo_Bb-v-Ii_match'], stats['gold_pred_non-Oo'])
        stats['Tag_Acc_I_strength'] = Ratio(stats['gold_pred_Ii_strength_match'], stats['gold_pred_Ii'])


        stats['P'] = Ratio(stats['PNumer'], stats['PDenom'])
        stats['R'] = Ratio(stats['RNumer'], stats['RDenom'])
        stats['F'] = f1(stats['P'], stats['R'])
        stats['CrossGapP'] = stats['CrossGapPNumer']/stats['CrossGapPDenom'] if stats['CrossGapPDenom']>0 else float('nan')
        stats['CrossGapR'] = stats['CrossGapRNumer']/stats['CrossGapRDenom'] if stats['CrossGapRDenom']>0 else float('nan')
        stats['EP'] = Ratio(stats['ENumer'], stats['EPDenom'])
        stats['ER'] = Ratio(stats['ENumer'], stats['ERDenom'])
        stats['EF'] = f1(stats['EP'], stats['ER'])

        if gmwetypes:
            assert stats['Gold_#Groups']==sum(gmwetypes.values())
            stats['Gold_#Types'] = len(gmwetypes)
        assert stats['Pred_#Groups']==sum(pmwetypes.values())
        stats['Pred_#Types'] = len(pmwetypes)

        if len(predFs)==1:
            print('mwestats = ', dict(stats), ';', sep='')
            print()
            print('sststats = ', dict(sststats), ';', sep='')
            print()
            print('conf = ', dict(conf), ';', sep='')
            print()

        parts = [(' {1}{0:.2%}'.format(float(stats[x]), relativeColor(stats[x],statsCs[0][x]))+Colors.PLAINTEXT,
                  '{:>7}'.format('' if x.endswith('F') or isinstance(stats[x],(float,int)) else stats[x].numeratorS),
                  '{:>7}'.format('' if x.endswith('F') or isinstance(stats[x],(float,int)) else stats[x].denominatorS)) for x in ('P', 'R', 'F', 'EP', 'ER', 'EF', 'Acc',
                  'Tag_R_Oo', 'Tag_R_non-Oo',
                  'Tag_Acc_non-Oo_in-gap', 'Tag_Acc_non-Oo_B-v-I')]
        for j,pp in enumerate(zip(*parts)):
            print((sysprefix if j==0 else syspad)+' '.join(pp))
    print()

    #print(pmwetypes)

    # Supersense stats
    if len(predFs)==1:
        # supersense confusion matrices
        colrs = {'n.': Colors.RED, 'v.': Colors.BLUE}
        fmts = {'n.': str.upper, 'v.': str.lower}
        for d,d2 in (('n.','v.'),('v.','n.')):
            matrix = [['{: >15}'.format('----')+' {:5}'.format(goldLblsC[None] or '')]]
            header = ['           {}GOLD{}      '.format(Styles.UNDERLINE, Styles.NORMAL),' ----']
            lbls = [None]
            for lbl,n in goldLblsC.most_common():
                if lbl.startswith(d):
                    lbls.append(lbl)
                    matrix.append([colrs[d]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)])
                    header.append(' '+colrs[d]+fmts[d](lbl[2:])[:4]+Colors.PLAINTEXT)
            # cross-POS confusions
            gconfsC = Counter([p for (g,p),n in conf.most_common() if g and p and g.startswith(d) for i in range(n)])
            for lbl,n in sorted(gconfsC.most_common(), key=lambda (l,lN): not l.startswith(d)):
                if lbl not in lbls:
                    lbls.append(lbl)
                    #matrix.append([colrs[d2]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)])
                    header.append(' '+colrs[lbl[:2]]+fmts[lbl[:2]](lbl[2:])[:4]+Colors.PLAINTEXT)
                    # since this label is for the other part of speech, show as a column (predicted) but not a row (gold)

            header.append(' <-- PRED')

            # matrix content
            if not conf:
                print(Colors.RED+'No gold or predicted supersenses found: check that the input is in the right format. Exiting.'+Colors.RED+Colors.ENDC)
                sys.exit(1)
            nondiag_max = [n for (g,p),n in conf.most_common() if (g is None or g.startswith(d)) and g!=p][0]

            for i,g in enumerate(lbls):
                if i>=len(matrix): continue
                for j,p in enumerate(lbls):
                    while len(matrix[i])<=j+1:
                        matrix[i].append('')
                    v = conf[g,p]
                    #if v>0 or i==j:
                    #    print(v, g,p, int((v-1)/nondiag_max*len(SPECTRUM)), nondiag_max)
                    colr = SPECTRUM[int((v-1)/nondiag_max*len(SPECTRUM))] if v>0 and i!=j else Colors.PLAINTEXT
                    matrix[i][j+1] = colr+' {:4}'.format(conf[g,p] or '')+Colors.PLAINTEXT

            print(''.join(header))
            for ln in matrix:
                print(''.join(ln))
            print()

    # supersense scores
    print(syspad+'  Acc  |   P   |   R   |   F   || R: NSST | VSST ')
    for sststats,sysprefix in zip(sststatsCs,sysprefixes):
        parts = [(' {1}{0:.2%}'.format(float(sststats['Exact Tag']['Acc']), relativeColor(sststats['Exact Tag']['Acc'],sststatsCs[0]['Exact Tag']['Acc']))+Colors.PLAINTEXT,
                  '{:>7}'.format(sststats['Exact Tag']['Acc'].numeratorS),
                  '{:>7}'.format(sststats['Exact Tag']['Acc'].denominatorS))]
        parts += [(' {1}{0:.2%}'.format(float(sststats[None][x]), relativeColor(sststats[None][x],sststatsCs[0][None][x]))+Colors.PLAINTEXT,
                   '{:>7}'.format(sststats[None][x].numeratorS),
                   '{:>7}'.format(sststats[None][x].denominatorS)) for x in ('P', 'R')]
        parts += [(' {1}{0:.2%}  '.format(float(sststats[None]['F']), relativeColor(sststats[None]['F'],sststatsCs[0][None]['F']))+Colors.PLAINTEXT,
                   '         ',
                   '         ')]
        parts += [(' {1}{0:.2%}'.format(float(sststats[y]['R']), relativeColor(sststats[y]['R'],sststatsCs[0][y]['R']))+Colors.PLAINTEXT,
                   '{:>7}'.format(sststats[y]['R'].numeratorS),
                   '{:>7}'.format(sststats[y]['R'].denominatorS)) for y in ('n', 'v')]
        for j,pp in enumerate(zip(*parts)):
            print((sysprefix if j==0 else syspad)+' '.join(pp))
    print()

    # combined acc, P, R, F
    print(syspad+'  Acc  |   P   |   R   |   F   ')
    cstatsBL = None
    for sststats,sysprefix in zip(sststatsCs,sysprefixes):
        cstats = Counter()
        cstats['Acc'] = fullAcc
        cstats['P'] = Ratio(stats['P'].numerator + sststats[None]['P'].numerator,
                            stats['P'].denominator + sststats[None]['P'].denominator)
        cstats['R'] = Ratio(stats['R'].numerator + sststats[None]['R'].numerator,
                            stats['R'].denominator + sststats[None]['R'].denominator)
        cstats['F'] = f1(cstats['P'], cstats['R'])
        if cstatsBL is None:
            cstatsBL = cstats

        parts = [(' {1}{0:.2%}'.format(float(cstats[x]), relativeColor(cstats[x],cstatsBL[x]))+Colors.PLAINTEXT,
                  '{:>7}'.format('' if x.endswith('F') or isinstance(cstats[x],(float,int)) else cstats[x].numeratorS),
                  '{:>7}'.format('' if x.endswith('F') or isinstance(cstats[x],(float,int)) else cstats[x].denominatorS)) for x in ('Acc', 'P', 'R', 'F')]
        for j,pp in enumerate(zip(*parts)):
            print((sysprefix if j==0 else syspad)+' '.join(pp))

    if len(predFs)==1:
        print()
        print('SUMMARY SCORES')
        print('==============')
        print(re.sub(r'=([^=]+)$', '='+Colors.YELLOW+r'\1'+Colors.PLAINTEXT, 'MWEs: P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=stats, f=float(stats['F']))))
        print(re.sub(r'=([^=]+)$', '='+Colors.PINK+r'\1'+Colors.PLAINTEXT, 'Supersenses: P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=sststats[None], f=float(sststats[None]['F']))))
        print(re.sub(r'=([^=]+)$', '='+Colors.GREEN+r'\1'+Colors.PLAINTEXT, 'Combined: Acc={stats[Acc]} P={stats[P]} R={stats[R]} F={f:.2%}'.format(stats=cstats, f=float(cstats['F']))))

    # restore the terminal's default colors
    print(Colors.ENDC, end='')

    return np.array([float(stats['F']),float(sststats[None]['F']),float(cstats['F'])])
Beispiel #6
0
def evaluate(args):
    SPECTRUM = [
        Colors.BLUE, Colors.CYAN, Colors.GREEN, Colors.YELLOW, Colors.ORANGE,
        Colors.RED, Colors.PINK
    ]

    printSents = False
    while args and args[0].startswith('-'):
        if args[0] == '-p':  # print sentences to stderr
            printSents = True
        elif args[0] == '-C':  # turn off colors
            for c in dir(Colors):
                if not c.startswith('_'):
                    setattr(Colors, c, '')
            for s in dir(Styles):
                if not s.startswith('_'):
                    setattr(Styles, s, '')
            SPECTRUM = ['']
        else:
            assert False, 'Unexpected option: ' + args[0]
        args = args[1:]

    # set up color defaults
    print(Colors.BACKGROUND + Colors.PLAINTEXT, end='')

    nToks = 0

    goldLblsC = Counter()

    sent = []
    goldFP = args[0]
    print("goldFP: ", goldFP)
    print("predFP: ", args[1])
    predFs = [readsents(fileinput.input(predFP)) for predFP in args[1:]]
    statsCs = [Counter() for predFP in args[1:]]
    sststatsCs = [defaultdict(Counter) for predF in args[1:]]
    gmwetypesCs = [Counter() for predFP in args[1:]
                   ]  # these will all have the same contents
    pmwetypesCs = [Counter() for predFP in args[1:]]
    confCs = [Counter() for predFP in args[1:]]  # confusion matrix

    for sentId, gdata in readsents(fileinput.input(goldFP)):
        gtags_mwe = [t.encode('utf-8') for t in gdata["tags"]]
        #print ("t: ",t, gtags_mwe)
        assert all(len(t) <= 1 for t in gtags_mwe)
        glbls = {
            k - 1: v[1].encode('utf-8')
            for k, v in gdata["labels"].items()
        }
        goldLblsC.update(glbls.values())
        for predF, stats, gmwetypes, pmwetypes, sststats, conf in zip(
                predFs, statsCs, gmwetypesCs, pmwetypesCs, sststatsCs, confCs):
            sentId, pdata = next(predF)
            ptags_mwe = [t.encode('utf-8') for t in pdata["tags"]]
            plbls = {
                k - 1: v[1].encode('utf-8')
                for k, v in pdata["labels"].items()
            }
            assert all(len(t) <= 1 for t in ptags_mwe)
            words, poses = zip(*gdata["words"])
            assert len(words) == len(gtags_mwe) == len(ptags_mwe)
            nToks += len(words)
            stats['nFullTagCorrect'] += sum(1 for k in range(len(words))
                                            if gtags_mwe[k] == ptags_mwe[k]
                                            and glbls.get(k) == plbls.get(k))
            if printSents:
                if predFs[0] is predF:
                    print(color_render(words, gdata["_"], [],
                                       {k + 1: v
                                        for k, v in glbls.items()}),
                          file=sys.stderr)
                print(color_render(words, pdata["_"], [],
                                   {k + 1: v
                                    for k, v in plbls.items()}),
                      file=sys.stderr)
            try:
                mweval_sent(zip(words, gtags_mwe, ptags_mwe),
                            gdata["_"],
                            pdata["_"],
                            gmwetypes,
                            pmwetypes,
                            stats,
                            indata=(gdata, pdata))

                ssteval_sent(words, glbls, plbls, sststats, conf)
            except AssertionError as ex:
                print(render(words, gdata["_"], []))
                print(render(words, pdata["_"], []))
                raise ex

    # loaded all files and sentences.
    gmwetypes = gmwetypesCs[0]

    sysprefixes = [('SYS{:0' + str(len(str(len(predFs)))) +
                    '}  ').format(i + 1) if len(predFs) > 1 else ''
                   for i in range(len(predFs))]
    syspad = ' ' * len(sysprefixes[0])

    # MWE stats
    print(
        syspad +
        '   P   |   R   |   F   |   EP  |   ER  |   EF  |  Acc  |   O   | non-O | ingap | B vs I'
    )
    for stats, conf, pmwetypes, sysprefix in zip(statsCs, confCs, pmwetypesCs,
                                                 sysprefixes):
        fullAcc = Ratio(stats['nFullTagCorrect'], nToks)

        nTags = stats['correct'] + stats['incorrect']
        stats['Acc'] = Ratio(stats['correct'], nTags)
        stats['Tag_R_Oo'] = Ratio(stats['gold_pred_Oo'], stats['gold_Oo'])
        stats['Tag_R_non-Oo'] = Ratio(stats['gold_pred_non-Oo'],
                                      stats['gold_non-Oo'])
        stats['Tag_Acc_non-Oo_in-gap'] = Ratio(
            stats['gold_pred_non-Oo_in-or-out-of-gap_match'],
            stats['gold_pred_non-Oo'])
        stats['Tag_Acc_non-Oo_B-v-I'] = Ratio(
            stats['gold_pred_non-Oo_Bb-v-Ii_match'], stats['gold_pred_non-Oo'])
        stats['Tag_Acc_I_strength'] = Ratio(
            stats['gold_pred_Ii_strength_match'], stats['gold_pred_Ii'])

        stats['P'] = Ratio(stats['PNumer'], stats['PDenom'])
        stats['R'] = Ratio(stats['RNumer'], stats['RDenom'])
        stats['F'] = f1(stats['P'], stats['R'])
        stats['CrossGapP'] = stats['CrossGapPNumer'] / stats[
            'CrossGapPDenom'] if stats['CrossGapPDenom'] > 0 else float('nan')
        stats['CrossGapR'] = stats['CrossGapRNumer'] / stats[
            'CrossGapRDenom'] if stats['CrossGapRDenom'] > 0 else float('nan')
        stats['EP'] = Ratio(stats['ENumer'], stats['EPDenom'])
        stats['ER'] = Ratio(stats['ENumer'], stats['ERDenom'])
        stats['EF'] = f1(stats['EP'], stats['ER'])

        if gmwetypes:
            assert stats['Gold_#Groups'] == sum(gmwetypes.values())
            stats['Gold_#Types'] = len(gmwetypes)
        assert stats['Pred_#Groups'] == sum(pmwetypes.values())
        stats['Pred_#Types'] = len(pmwetypes)

        if len(predFs) == 1:
            print('mwestats = ', dict(stats), ';', sep='')
            print()
            print('sststats = ', dict(sststats), ';', sep='')
            print()
            print('conf = ', dict(conf), ';', sep='')
            print()

        parts = [(' {1}{0:.2%}'.format(float(
            stats[x]), relativeColor(stats[x], statsCs[0][x])) +
                  Colors.PLAINTEXT,
                  '{:>7}'.format('' if x.endswith('F') or isinstance(
                      stats[x], (float, int)) else stats[x].numeratorS),
                  '{:>7}'.format('' if x.endswith('F') or isinstance(
                      stats[x], (float, int)) else stats[x].denominatorS))
                 for x in ('P', 'R', 'F', 'EP', 'ER', 'EF', 'Acc', 'Tag_R_Oo',
                           'Tag_R_non-Oo', 'Tag_Acc_non-Oo_in-gap',
                           'Tag_Acc_non-Oo_B-v-I')]
        for j, pp in enumerate(zip(*parts)):
            print((sysprefix if j == 0 else syspad) + ' '.join(pp))
    print()

    #print(pmwetypes)

    # Supersense stats
    if len(predFs) == 1:
        # supersense confusion matrices
        colrs = {'n.': Colors.RED, 'v.': Colors.BLUE}
        fmts = {'n.': str.upper, 'v.': str.lower}
        for d, d2 in (('n.', 'v.'), ('v.', 'n.')):
            matrix = [[
                '{: >15}'.format('----') +
                ' {:5}'.format(goldLblsC[None] or '')
            ]]
            header = [
                '           {}GOLD{}      '.format(Styles.UNDERLINE,
                                                   Styles.NORMAL), ' ----'
            ]
            lbls = [None]
            for lbl, n in goldLblsC.most_common():
                if lbl.startswith(d):
                    lbls.append(lbl)
                    matrix.append([
                        colrs[d] + '{: >15}'.format(lbl) + Colors.PLAINTEXT +
                        ' {:5}'.format(n)
                    ])
                    header.append(' ' + colrs[d] + fmts[d](lbl[2:])[:4] +
                                  Colors.PLAINTEXT)
            # cross-POS confusions
            gconfsC = Counter([
                p for (g, p), n in conf.most_common()
                if g and p and g.startswith(d) for i in range(n)
            ])
            for lbl, n in sorted(gconfsC.most_common(),
                                 key=lambda (l, lN): not l.startswith(d)):
                if lbl not in lbls:
                    lbls.append(lbl)
                    #matrix.append([colrs[d2]+'{: >15}'.format(lbl)+Colors.PLAINTEXT+' {:5}'.format(n)])
                    header.append(' ' + colrs[lbl[:2]] +
                                  fmts[lbl[:2]](lbl[2:])[:4] +
                                  Colors.PLAINTEXT)
                    # since this label is for the other part of speech, show as a column (predicted) but not a row (gold)

            header.append(' <-- PRED')

            # matrix content
            if not conf:
                print(
                    Colors.RED +
                    'No gold or predicted supersenses found: check that the input is in the right format. Exiting.'
                    + Colors.RED + Colors.ENDC)
                sys.exit(1)
            nondiag_max = [
                n for (g, p), n in conf.most_common()
                if (g is None or g.startswith(d)) and g != p
            ][0]

            for i, g in enumerate(lbls):
                if i >= len(matrix): continue
                for j, p in enumerate(lbls):
                    while len(matrix[i]) <= j + 1:
                        matrix[i].append('')
                    v = conf[g, p]
                    #if v>0 or i==j:
                    #    print(v, g,p, int((v-1)/nondiag_max*len(SPECTRUM)), nondiag_max)
                    colr = SPECTRUM[int(
                        (v - 1) / nondiag_max * len(SPECTRUM)
                    )] if v > 0 and i != j else Colors.PLAINTEXT
                    matrix[i][j + 1] = colr + ' {:4}'.format(
                        conf[g, p] or '') + Colors.PLAINTEXT

            print(''.join(header))
            for ln in matrix:
                print(''.join(ln))
            print()

    # supersense scores
    print(syspad + '  Acc  |   P   |   R   |   F   || R: NSST | VSST ')
    for sststats, sysprefix in zip(sststatsCs, sysprefixes):
        parts = [(' {1}{0:.2%}'.format(
            float(sststats['Exact Tag']['Acc']),
            relativeColor(sststats['Exact Tag']['Acc'],
                          sststatsCs[0]['Exact Tag']['Acc'])) +
                  Colors.PLAINTEXT,
                  '{:>7}'.format(sststats['Exact Tag']['Acc'].numeratorS),
                  '{:>7}'.format(sststats['Exact Tag']['Acc'].denominatorS))]
        parts += [
            (' {1}{0:.2%}'.format(
                float(sststats[None][x]),
                relativeColor(sststats[None][x], sststatsCs[0][None][x])) +
             Colors.PLAINTEXT, '{:>7}'.format(sststats[None][x].numeratorS),
             '{:>7}'.format(sststats[None][x].denominatorS))
            for x in ('P', 'R')
        ]
        parts += [(' {1}{0:.2%}  '.format(
            float(sststats[None]['F']),
            relativeColor(sststats[None]['F'], sststatsCs[0][None]['F'])) +
                   Colors.PLAINTEXT, '         ', '         ')]
        parts += [
            (' {1}{0:.2%}'.format(
                float(sststats[y]['R']),
                relativeColor(sststats[y]['R'], sststatsCs[0][y]['R'])) +
             Colors.PLAINTEXT, '{:>7}'.format(sststats[y]['R'].numeratorS),
             '{:>7}'.format(sststats[y]['R'].denominatorS)) for y in ('n', 'v')
        ]
        for j, pp in enumerate(zip(*parts)):
            print((sysprefix if j == 0 else syspad) + ' '.join(pp))
    print()

    # combined acc, P, R, F
    print(syspad + '  Acc  |   P   |   R   |   F   ')
    cstatsBL = None
    for sststats, sysprefix in zip(sststatsCs, sysprefixes):
        cstats = Counter()
        cstats['Acc'] = fullAcc
        cstats['P'] = Ratio(
            stats['P'].numerator + sststats[None]['P'].numerator,
            stats['P'].denominator + sststats[None]['P'].denominator)
        cstats['R'] = Ratio(
            stats['R'].numerator + sststats[None]['R'].numerator,
            stats['R'].denominator + sststats[None]['R'].denominator)
        cstats['F'] = f1(cstats['P'], cstats['R'])
        if cstatsBL is None:
            cstatsBL = cstats

        parts = [(' {1}{0:.2%}'.format(float(cstats[x]),
                                       relativeColor(cstats[x], cstatsBL[x])) +
                  Colors.PLAINTEXT,
                  '{:>7}'.format('' if x.endswith('F') or isinstance(
                      cstats[x], (float, int)) else cstats[x].numeratorS),
                  '{:>7}'.format('' if x.endswith('F') or isinstance(
                      cstats[x], (float, int)) else cstats[x].denominatorS))
                 for x in ('Acc', 'P', 'R', 'F')]
        for j, pp in enumerate(zip(*parts)):
            print((sysprefix if j == 0 else syspad) + ' '.join(pp))

    if len(predFs) == 1:
        print()
        print('SUMMARY SCORES')
        print('==============')
        print(
            re.sub(
                r'=([^=]+)$', '=' + Colors.YELLOW + r'\1' + Colors.PLAINTEXT,
                'MWEs: P={stats[P]} R={stats[R]} F={f:.2%}'.format(
                    stats=stats, f=float(stats['F']))))
        print(
            re.sub(
                r'=([^=]+)$', '=' + Colors.PINK + r'\1' + Colors.PLAINTEXT,
                'Supersenses: P={stats[P]} R={stats[R]} F={f:.2%}'.format(
                    stats=sststats[None], f=float(sststats[None]['F']))))
        print(
            re.sub(
                r'=([^=]+)$', '=' + Colors.GREEN + r'\1' + Colors.PLAINTEXT,
                'Combined: Acc={stats[Acc]} P={stats[P]} R={stats[R]} F={f:.2%}'
                .format(stats=cstats, f=float(cstats['F']))))

    # restore the terminal's default colors
    print(Colors.ENDC, end='')

    return np.array(
        [float(stats['F']),
         float(sststats[None]['F']),
         float(cstats['F'])])
Beispiel #7
0
                print(color_render(words, pdata["_"], [],
                                   {k + 1: v
                                    for k, v in plbls.items()}),
                      file=sys.stderr)
            try:
                mweval_sent(zip(words, gtags_mwe, ptags_mwe),
                            gdata["_"],
                            pdata["_"],
                            gmwetypes,
                            pmwetypes,
                            stats,
                            indata=(gdata, pdata))

                ssteval_sent(words, glbls, plbls, sststats, conf)
            except AssertionError as ex:
                print(render(words, gdata["_"], []))
                print(render(words, pdata["_"], []))
                raise ex

    # loaded all files and sentences.
    gmwetypes = gmwetypesCs[0]

    sysprefixes = [('SYS{:0' + str(len(str(len(predFs)))) +
                    '}  ').format(i + 1) if len(predFs) > 1 else ''
                   for i in range(len(predFs))]
    syspad = ' ' * len(sysprefixes[0])

    # MWE stats
    print(
        syspad +
        '   P   |   R   |   F   |   EP  |   ER  |   EF  |  Acc  |   O   | non-O | ingap | B vs I'