Exemple #1
0
def evaluate_engine_scores():
    concat_cross_validation_folds()
    ref_scores = {}
    avg_scores = {}

    print 'Average scores:'
    for dataset in EVAL_DATASETS:
        print '\t {0} dataset:'.format(dataset)
        ref_scores[dataset] = {}
        avg_scores[dataset] = {}
        path = os.path.join(EVAL_IN_DIR, dataset)
        s = Score(path)
        test_list = [
            "{0}-{1}.txt".format(path, engine) for engine in EVAL_ENGINES
        ]
        ref_file = path + "-ref.txt"

        (file1, file2, line_counts) = s.BatchFiles(test_list, [ref_file])
        output_file = s.ComputeMeteorScores(file1, file2, n_refs=4)
        results = s.UnbatchResults(output_file, test_list, [ref_file],
                                   line_counts)
        for i, r in enumerate(results):
            len(r) == 1 or Die("Multiple results for only 1 reference")
            engine_name = EVAL_ENGINES[i]
            ref_scores[dataset][engine_name] = r[0]
            avg_scores[dataset][engine_name] = float(sum(r[0])) / len(r[0])
            print '\t\t {0} engine: {1}'.format(
                engine_name, avg_scores[dataset][engine_name])

    out_dir = os.path.join(score.OUTPUT_DIR, "results")
    pickle.dump(ref_scores, open(os.path.join(out_dir, 'ref_scores.pkl'), 'w'))
    pickle.dump(avg_scores, open(os.path.join(out_dir, 'avg_scores.pkl'), 'w'))