コード例 #1
0
def histograms(data_sets, file_path, fig_fname=None):
    fig = plt.figure()

    for ax_n, data in enumerate(data_sets):
        table = read(file_path.format("input", data), 
                     file_path.format("gs", data))

        ax = fig.add_subplot(2, 3, ax_n+1)
        bins = np.arange(0, 6, 0.5000001)
        ax.hist(table["gold"], bins=bins, facecolor='green', alpha=0.5)
        ax.set_xlabel("Score")
        ax.set_ylabel("Count")
        ax.set_title(data)
        ax.set_xlim(0, 5)
        
        ax.grid(True)

    fig.tight_layout()
    
    if fig_fname:
        plt.savefig(fig_fname)
    else:
        plt.show()
コード例 #2
0
Extract sentence pairs for which the predicted similarity scores by the
TakeLab simple system deviate the most from the gold standard
"""

import codecs
import numpy as np

from sts.io import read
from sts.score import correlation

import matplotlib.pyplot as plt
from numpy.lib.recfunctions import append_fields

for data in "MSRpar", "MSRvid", "SMTeuroparl", "surprise.OnWN", "surprise.SMTnews":
    table = read("../../data/STS2012-test/STS.input.{}.txt".format(data),
                 "../../data/STS2012-test/STS.gs.{}.txt".format(data),
                 "takelab-out/{}-output.txt".format(data.lower()))

    # add new filed "diff" for difference between gold standard and system output
    diff = abs(table["gold"] - table["output"])
    table = append_fields(table, 'diff', diff, 'f', usemask=False)
    # sort descending on diff
    table.sort(axis=0, order='diff')
    table = table[::-1]
    # write in "markdown" format for Github wiki
    f = codecs.open("Errors.TakeLab." + data + ".txt", "w", encoding="utf-8")
    for i, row in enumerate(table):
        f.write("**{}: ".format(i + 1))
        f.write(u"diff={0[4]:.4}, gold={0[2]:.4},  sys={0[3]:.4}**\n\n"
                u"  * {0[0]}\n  * {0[1]}\n\n---\n\n".format(row))
コード例 #3
0
ファイル: errors.py プロジェクト: STS-NTNU/STS13
TakeLab simple system deviate the most from the gold standard
"""

import codecs
import numpy as np

from sts.io import read
from sts.score import correlation

import matplotlib.pyplot as plt
from numpy.lib.recfunctions import append_fields


for data in "MSRpar", "MSRvid", "SMTeuroparl", "surprise.OnWN", "surprise.SMTnews":
    table = read("../../data/STS2012-test/STS.input.{}.txt".format(data),
                 "../../data/STS2012-test/STS.gs.{}.txt".format(data),
                 "takelab-out/{}-output.txt".format(data.lower()))
    
    # add new filed "diff" for difference between gold standard and system output    
    diff = abs(table["gold"] - table["output"])
    table = append_fields(table, 'diff', diff, 'f', usemask=False)
    # sort descending on diff
    table.sort(axis=0, order='diff')
    table = table[::-1]
    # write in "markdown" format for Github wiki  
    f = codecs.open("Errors.TakeLab." + data + ".txt", "w", encoding="utf-8")
    for i, row in enumerate(table):
        f.write("**{}: ".format(i+1))
        f.write(u"diff={0[4]:.4}, gold={0[2]:.4},  sys={0[3]:.4}**\n\n"
                u"  * {0[0]}\n  * {0[1]}\n\n---\n\n".format(row))