コード例 #1
0
def main():
    parser = argparse.ArgumentParser(description="Evaluation")
    parser.add_argument('--mtevaldir',type=str, help="Path to MT evaluation scripts",action='store',default="")
    parser.add_argument('--ref',type=str,help='Reference file', action='store',required=True)
    parser.add_argument('--out',type=str,help='Output file', action='store',required=True)
    parser.add_argument('--workdir','-w',type=str,help='Work directory', action='store',default=".")
    parser.add_argument('-i',dest='casesensitive',help='Measure translation accuracy without regard for case',action='store_false',default=True)
    parser.add_argument('-a',dest='oof',help='Out of five evaluation, considers up to four additional alternatives in system output',action='store_true',default=False)
    #parser.add_argument('-C',dest='forcecontext',help='Force context from input, even if system-supplied context is different',action='store_true',default=False)
    parser.add_argument('-I',dest='ignoreinputmismatch',help='Ignore input mismatch',action='store_true',default=False)
    args = parser.parse_args()

    totalavgaccuracy, totalwordavgaccuracy, totalavgrecall, matrexsrcfile, matrextgtfile, matrexoutfile = evaluate(Reader(args.ref), Reader(args.out), args.mtevaldir, args.workdir, args.casesensitive, args.oof, args.ignoreinputmismatch)

    outprefix = '.'.join(args.out.split('.')[:-1])

    if args.mtevaldir:
        mtscore(args.mtevaldir, matrexsrcfile, matrextgtfile, matrexoutfile, totalavgaccuracy, totalwordavgaccuracy, totalavgrecall, outprefix, args.workdir)
    else:
        f = io.open(outprefix + '.summary.score','w')
        s = "Accuracy Word-Accuracy Recall"
        f.write(s+ "\n")
        log(s)
        s = str(totalavgaccuracy) + " " + str(totalwordavgaccuracy) + " " + str(totalavgrecall)
        f.write(s + "\n")
        log(s)
        f.close()
コード例 #2
0
def mtscore(mtevaldir,
            sourcexml,
            refxml,
            targetxml,
            totalavgaccuracy,
            totalwordavgaccuracy,
            totalavgrecall,
            outprefix,
            WORKDIR='.'):

    per = 0
    wer = 0
    bleu = 0
    meteor = 0
    nist = 0
    ter = 0

    EXEC_MATREX_WER = mtevaldir + '/eval/WER_v01.pl'
    EXEC_MATREX_PER = mtevaldir + '/eval/PER_v01.pl'
    EXEC_MATREX_BLEU = mtevaldir + '/eval/bleu-1.04.pl'
    EXEC_MATREX_METEOR = mtevaldir + '/meteor-0.6/meteor.pl'
    EXEC_MATREX_MTEVAL = mtevaldir + '/mteval-v11b.pl'
    EXEC_MATREX_TER = mtevaldir + '/tercom.jar'
    EXEC_PERL = 'perl'
    EXEC_JAVA = 'java'

    errors = False
    if EXEC_MATREX_BLEU and os.path.exists(EXEC_MATREX_BLEU):
        if not runcmd(
                EXEC_PERL + ' ' + EXEC_MATREX_BLEU + " -r " + refxml + ' -t ' +
                targetxml + ' -s ' + sourcexml + ' -ci > ' + outprefix +
                '.bleu.score', 'Computing BLEU score'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.bleu.score')
                for line in f:
                    if line[0:9] == "BLEUr1n4,":
                        bleu = float(line[10:].strip())
                        print("BLEU score: ", bleu, file=sys.stderr)
                f.close()
            except Exception as e:
                log("Error reading bleu.score:" + str(e), red)
                errors = True
    else:
        log("Skipping BLEU (no script found [" + EXEC_MATREX_BLEU + "])",
            yellow)

    if EXEC_MATREX_WER and os.path.exists(EXEC_MATREX_WER):
        if not runcmd(
                EXEC_PERL + ' ' + EXEC_MATREX_WER + " -r " + refxml + ' -t ' +
                targetxml + ' -s ' + sourcexml + '  > ' + outprefix +
                '.wer.score', 'Computing WER score'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.wer.score',
                            'r',
                            encoding='utf-8')
                for line in f:
                    if line[0:11] == "WER score =":
                        wer = float(line[12:19].strip())
                        log("WER score: " + str(wer), white)
                f.close()
            except Exception as e:
                log("Error reading wer.score:" + str(e), red)
                errors = True
    else:
        log("Skipping WER (no script found [" + EXEC_MATREX_WER + "]) ",
            yellow)

    if EXEC_MATREX_PER and os.path.exists(EXEC_MATREX_PER):
        if not runcmd(
                EXEC_PERL + ' ' + EXEC_MATREX_PER + " -r " + refxml + ' -t ' +
                targetxml + ' -s ' + sourcexml + '  > ' + outprefix +
                '.per.score', 'Computing PER score'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.per.score',
                            'r',
                            encoding='utf-8')
                for line in f:
                    if line[0:11] == "PER score =":
                        per = float(line[12:19].strip())
                        log("PER score: " + str(per), white)
                f.close()
            except Exception as e:
                log("Error reading per.score" + str(e), red)
                errors = True
    else:
        log("Skipping PER (no script found [" + EXEC_MATREX_PER + "])", yellow)

    if EXEC_MATREX_METEOR and os.path.exists(EXEC_MATREX_METEOR):
        if not runcmd(
                EXEC_PERL + ' -I ' + os.path.dirname(EXEC_MATREX_METEOR) +
                ' ' + EXEC_MATREX_METEOR + " -s colibrita -r " + refxml +
                ' -t ' + targetxml + ' --modules "exact"  > ' + outprefix +
                '.meteor.score', 'Computing METEOR score'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.meteor.score',
                            'r',
                            encoding='utf-8')
                for line in f:
                    if line[0:6] == "Score:":
                        meteor = float(line[7:].strip())
                        log("METEOR score: " + str(meteor), white)
                f.close()
            except Exception as e:
                log("Error reading meteor.score:" + str(e), red)
                errors = True
    else:
        log("Skipping METEOR (no script found [" + EXEC_MATREX_METEOR + "])",
            yellow)

    if EXEC_MATREX_MTEVAL and os.path.exists(EXEC_MATREX_MTEVAL):
        if not runcmd(
                EXEC_PERL + ' ' + EXEC_MATREX_MTEVAL + " -r " + refxml +
                ' -t ' + targetxml + ' -s ' + sourcexml + '  > ' + outprefix +
                '.mteval.score', 'Computing NIST & BLEU scores'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.mteval.score',
                            'r',
                            encoding='utf-8')
                for line in f:
                    if line[0:12] == "NIST score =":
                        nist = float(line[13:21].strip())
                        log("NIST score: ", nist)
                    if line[21:33] == "BLEU score =":
                        try:
                            bleu2 = float(line[34:40].strip())
                            if bleu == 0:
                                bleu = bleu2
                                log("BLEU score: " + str(bleu), white)
                            elif abs(bleu - bleu2) > 0.01:
                                log("blue score from MTEVAL scripts differs too much: "
                                    + str(bleu) + " vs " + str(bleu2) +
                                    ", choosing highest score")
                                if bleu2 > bleu:
                                    bleu = bleu2
                            else:
                                log("BLEU score (not stored): " +
                                    str(float(line[34:40].strip())))
                        except:
                            raise
                f.close()
            except Exception as e:
                log("Error reading mteval.score: " + str(e), red)
                errors = True
    else:
        log("Skipping MTEVAL (BLEU & NIST) (no script found)", yellow)

    if EXEC_MATREX_TER and os.path.exists(EXEC_MATREX_TER):
        if not runcmd(
                EXEC_JAVA + ' -jar ' + EXEC_MATREX_TER + " -r " + refxml +
                ' -h ' + targetxml + '  > ' + outprefix + '.ter.score',
                'Computing TER score'):
            errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.ter.score',
                            'r',
                            encoding='utf-8')
                for line in f:
                    if line[0:10] == "Total TER:":
                        ter = float(line[11:].strip().split(' ')[0])
                        log("TER score: ", ter, white)
                f.close()
            except Exception as e:
                log("Error reading ter.score: " + str(e), red)
    else:
        log("Skipping TER (no script found)", yellow)

    log("SCORE SUMMARY\n===================\n")
    f = io.open(WORKDIR + '/' + outprefix + '.summary.score', 'w')
    s = "Accuracy Word-Accuracy Recall BLEU METEOR NIST TER WER PER"
    f.write(s + "\n")
    log(s)
    s = str(totalavgaccuracy) + " " + str(totalwordavgaccuracy) + " " + str(
        totalavgrecall) + " " + str(bleu) + " " + str(meteor) + " " + str(
            nist) + " " + str(ter) + " " + str(wer) + " " + str(per)
    f.write(s + "\n")
    log(s)
    f.close()

    return not errors
コード例 #3
0
def main():
    parser = argparse.ArgumentParser(description="Evaluation")
    parser.add_argument('--mtevaldir',
                        type=str,
                        help="Path to MT evaluation scripts",
                        action='store',
                        default="")
    parser.add_argument('--ref',
                        type=str,
                        help='Reference file',
                        action='store',
                        required=True)
    parser.add_argument('--out',
                        type=str,
                        help='Output file',
                        action='store',
                        required=True)
    parser.add_argument('--workdir',
                        '-w',
                        type=str,
                        help='Work directory',
                        action='store',
                        default=".")
    parser.add_argument(
        '-i',
        dest='casesensitive',
        help='Measure translation accuracy without regard for case',
        action='store_false',
        default=True)
    parser.add_argument(
        '-a',
        dest='oof',
        help=
        'Out of five evaluation, considers up to four additional alternatives in system output',
        action='store_true',
        default=False)
    #parser.add_argument('-C',dest='forcecontext',help='Force context from input, even if system-supplied context is different',action='store_true',default=False)
    parser.add_argument('-I',
                        dest='ignoreinputmismatch',
                        help='Ignore input mismatch',
                        action='store_true',
                        default=False)
    args = parser.parse_args()

    totalavgaccuracy, totalwordavgaccuracy, totalavgrecall, matrexsrcfile, matrextgtfile, matrexoutfile = evaluate(
        Reader(args.ref), Reader(args.out), args.mtevaldir, args.workdir,
        args.casesensitive, args.oof, args.ignoreinputmismatch)

    outprefix = '.'.join(args.out.split('.')[:-1])

    if args.mtevaldir:
        mtscore(args.mtevaldir, matrexsrcfile, matrextgtfile, matrexoutfile,
                totalavgaccuracy, totalwordavgaccuracy, totalavgrecall,
                outprefix, args.workdir)
    else:
        f = io.open(outprefix + '.summary.score', 'w')
        s = "Accuracy Word-Accuracy Recall"
        f.write(s + "\n")
        log(s)
        s = str(totalavgaccuracy) + " " + str(
            totalwordavgaccuracy) + " " + str(totalavgrecall)
        f.write(s + "\n")
        log(s)
        f.close()
コード例 #4
0
def mtscore(mtevaldir, sourcexml, refxml, targetxml, totalavgaccuracy, totalwordavgaccuracy, totalavgrecall, outprefix, WORKDIR = '.'):

    per = 0
    wer = 0
    bleu = 0
    meteor = 0
    nist = 0
    ter = 0

    EXEC_MATREX_WER = mtevaldir + '/eval/WER_v01.pl'
    EXEC_MATREX_PER = mtevaldir + '/eval/PER_v01.pl'
    EXEC_MATREX_BLEU = mtevaldir + '/eval/bleu-1.04.pl'
    EXEC_MATREX_METEOR = mtevaldir + '/meteor-0.6/meteor.pl'
    EXEC_MATREX_MTEVAL = mtevaldir + '/mteval-v11b.pl'
    EXEC_MATREX_TER = mtevaldir + '/tercom.jar'
    EXEC_PERL = 'perl'
    EXEC_JAVA = 'java'

    errors = False
    if EXEC_MATREX_BLEU and os.path.exists(EXEC_MATREX_BLEU):
        if not runcmd(EXEC_PERL + ' ' + EXEC_MATREX_BLEU + " -r " + refxml + ' -t ' + targetxml + ' -s ' + sourcexml + ' -ci > ' + outprefix + '.bleu.score',  'Computing BLEU score'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.bleu.score')
                for line in f:
                    if line[0:9] == "BLEUr1n4,":
                        bleu = float(line[10:].strip())
                        print("BLEU score: ", bleu, file=sys.stderr)
                f.close()
            except Exception as e:
                log("Error reading bleu.score:" + str(e),red)
                errors = True
    else:
        log("Skipping BLEU (no script found ["+EXEC_MATREX_BLEU+"])",yellow)

    if EXEC_MATREX_WER and os.path.exists(EXEC_MATREX_WER):
        if not runcmd(EXEC_PERL + ' ' + EXEC_MATREX_WER + " -r " + refxml + ' -t ' + targetxml + ' -s ' + sourcexml + '  > ' + outprefix + '.wer.score', 'Computing WER score'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.wer.score','r',encoding='utf-8')
                for line in f:
                    if line[0:11] == "WER score =":
                        wer = float(line[12:19].strip())
                        log("WER score: " + str(wer), white)
                f.close()
            except Exception as e:
                log("Error reading wer.score:" + str(e),red)
                errors = True
    else:
        log("Skipping WER (no script found ["+EXEC_MATREX_WER+"]) ",yellow)

    if EXEC_MATREX_PER and os.path.exists(EXEC_MATREX_PER):
        if not runcmd(EXEC_PERL + ' ' + EXEC_MATREX_PER + " -r " + refxml + ' -t ' + targetxml + ' -s ' + sourcexml + '  > ' + outprefix + '.per.score',  'Computing PER score'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix +'.per.score','r',encoding='utf-8')
                for line in f:
                    if line[0:11] == "PER score =":
                        per = float(line[12:19].strip())
                        log("PER score: " + str(per), white)
                f.close()
            except Exception as e:
                log("Error reading per.score" + str(e),red)
                errors = True
    else:
        log("Skipping PER (no script found ["+EXEC_MATREX_PER+"])",yellow)

    if EXEC_MATREX_METEOR and os.path.exists(EXEC_MATREX_METEOR):
        if not runcmd(EXEC_PERL + ' -I ' + os.path.dirname(EXEC_MATREX_METEOR) + ' ' + EXEC_MATREX_METEOR + " -s colibrita -r " + refxml + ' -t ' + targetxml + ' --modules "exact"  > ' + outprefix + '.meteor.score',  'Computing METEOR score'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.meteor.score','r',encoding='utf-8')
                for line in f:
                    if line[0:6] == "Score:":
                        meteor = float(line[7:].strip())
                        log("METEOR score: " + str(meteor), white)
                f.close()
            except Exception as e:
                log("Error reading meteor.score:" + str(e),red)
                errors = True
    else:
        log("Skipping METEOR (no script found ["+EXEC_MATREX_METEOR+"])",yellow)

    if EXEC_MATREX_MTEVAL and os.path.exists(EXEC_MATREX_MTEVAL):
        if not runcmd(EXEC_PERL + ' ' + EXEC_MATREX_MTEVAL + " -r " + refxml + ' -t ' + targetxml + ' -s ' + sourcexml +  '  > ' + outprefix + '.mteval.score',  'Computing NIST & BLEU scores'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR + '/' + outprefix + '.mteval.score','r',encoding='utf-8')
                for line in f:
                    if line[0:12] == "NIST score =":
                        nist = float(line[13:21].strip())
                        log("NIST score: ", nist)
                    if line[21:33] == "BLEU score =":
                        try:
                            bleu2 = float(line[34:40].strip())
                            if bleu == 0:
                                bleu = bleu2
                                log("BLEU score: " + str(bleu), white)
                            elif abs(bleu - bleu2) > 0.01:
                                log("blue score from MTEVAL scripts differs too much: " + str(bleu) + " vs " + str(bleu2) +  ", choosing highest score")
                                if bleu2 > bleu:
                                    bleu = bleu2
                            else:
                                log("BLEU score (not stored): " + str(float(line[34:40].strip())))
                        except:
                            raise
                f.close()
            except Exception as e:
                log("Error reading mteval.score: " + str(e),red)
                errors = True
    else:
        log("Skipping MTEVAL (BLEU & NIST) (no script found)", yellow)

    if EXEC_MATREX_TER and os.path.exists(EXEC_MATREX_TER):
        if not runcmd(EXEC_JAVA + ' -jar ' + EXEC_MATREX_TER + " -r " + refxml + ' -h ' + targetxml + '  > ' + outprefix + '.ter.score',  'Computing TER score'): errors = True
        if not errors:
            try:
                f = io.open(WORKDIR +'/' + outprefix + '.ter.score','r',encoding='utf-8')
                for line in f:
                    if line[0:10] == "Total TER:":
                        ter = float(line[11:].strip().split(' ')[0])
                        log("TER score: ", ter,white)
                f.close()
            except Exception as e:
                log("Error reading ter.score: " + str(e),red)
    else:
        log("Skipping TER (no script found)",yellow)


    log("SCORE SUMMARY\n===================\n")
    f = io.open(WORKDIR + '/' + outprefix + '.summary.score','w')
    s = "Accuracy Word-Accuracy Recall BLEU METEOR NIST TER WER PER"
    f.write(s+ "\n")
    log(s)
    s = str(totalavgaccuracy) + " " + str(totalwordavgaccuracy) + " " + str(totalavgrecall) + " " + str(bleu) + " " + str(meteor) + " " + str(nist)  + " " + str(ter) + " " + str(wer)  + " " + str(per)
    f.write(s + "\n")
    log(s)
    f.close()


    return not errors