Beispiel #1
0
def compute_gleu(source, references, prediction_path):
    """get sentence-level gleu scores"""
    sys.stderr.write('Running GLEU...\n')
    gleu_calculator = GLEU(4)
    gleu_calculator.load_sources(source)
    num_iterations = 200
    gleu_calculator.load_references(references)
    return np.array(
        [float(g[0]) for g in gleu_calculator.run_iterations(num_iterations=num_iterations,
                                                             #num_references=len(references),
                                                             source=source,
                                                             hypothesis=prediction_path,
                                                             per_sent=True)])
Beispiel #2
0
def gleu_scores(source,
                references,
                systems,
                ngrams_len=4,
                num_iterations=500,
                debug=False):
    # if there is only one reference, just do one iteration
    if len(references) == 1:
        num_iterations = 1

    gleu_calculator = GLEU(ngrams_len)

    if isinstance(source, six.string_types):
        gleu_calculator.load_sources(source)
    else:
        gleu_calculator.set_sources(source)

    if isinstance(references[0], six.string_types):
        gleu_calculator.load_references(references)
    else:
        gleu_calculator.set_references(references)

    total = []
    per_sentence = []
    for hpath in systems:
        if isinstance(hpath, six.string_types):
            with open(hpath) as instream:
                hyp = [line.split() for line in instream]
            if not debug:
                print(os.path.basename(hpath), )
        else:
            instream = hpath
            hyp = [line.split() for line in instream]

        # first generate a random list of indices, using a different seed
        # for each iteration
        indices = []
        for j in range(num_iterations):
            random.seed(j * 101)
            indices.append([
                random.randint(0,
                               len(references) - 1) for i in range(len(hyp))
            ])

        if debug:
            print()
            print('===== Sentence-level scores =====')
            print('SID Mean Stdev 95%CI GLEU')

        iter_stats = [[0 for i in range(2 * ngrams_len + 2)]
                      for j in range(num_iterations)]

        for i, h in enumerate(hyp):

            gleu_calculator.load_hypothesis_sentence(h)
            # we are going to store the score of this sentence for each ref
            # so we don't have to recalculate them 500 times

            stats_by_ref = [None for r in range(len(references))]

            for j in range(num_iterations):
                ref = indices[j][i]
                this_stats = stats_by_ref[ref]

                if this_stats is None:
                    this_stats = [
                        s for s in gleu_calculator.gleu_stats(i, r_ind=ref)
                    ]
                    stats_by_ref[ref] = this_stats

                iter_stats[j] = [
                    sum(scores) for scores in zip(iter_stats[j], this_stats)
                ]

            per_sentence.append(
                get_gleu_stats([
                    gleu_calculator.gleu(stats, smooth=True)
                    for stats in stats_by_ref
                ]))
            if debug:
                # sentence-level GLEU is the mean GLEU of the hypothesis
                # compared to each reference
                for r in range(len(references)):
                    if stats_by_ref[r] is None:
                        stats_by_ref[r] = [
                            s for s in gleu_calculator.gleu_stats(i, r_ind=r)
                        ]

                print(i, )
                print(' '.join(per_sentence[-1]))
        total.append(
            get_gleu_stats(
                [gleu_calculator.gleu(stats) for stats in iter_stats]))
        if debug:
            print('\n==== Overall score =====')
            print('Mean Stdev 95%CI GLEU')
            print(' '.join(total[-1]))
        else:
            print("total", total[-1][0])
    return total, per_sentence
Beispiel #3
0
                        help='path to src sentences')
    parser.add_argument('-r',
                        '--ref',
                        nargs='*',
                        required=True,
                        help='references to use')
    parser.add_argument('-d',
                        '--debug',
                        default=False,
                        action='store_true',
                        help='print debugging messages')
    parser.add_argument('-c',
                        '--cand',
                        nargs='*',
                        required=True,
                        help='candidate(s) to score')
    args = parser.parse_args()

    gleu_calculator = GLEU(4)
    gleu_calculator.load_sources(args.src)
    num_iterations = 200
    gleu_calculator.load_references(args.ref)
    for cand in args.cand:
        print cand, [
            float(g[0]) for g in gleu_calculator.run_iterations(
                num_iterations=num_iterations,
                source=args.src,
                hypothesis=cand,
                per_sent=False)
        ][0]