Ejemplo n.º 1
0
def compute_bleu(net, word_dict, index_dict, tokens, initial=None, IM=None):
    """
    Return BLEU scores for reference tokens
    For each reference caption, a candidate caption is sampled from net
    """
    bleu_scores = np.zeros((len(tokens), 3))
    for i, ref in enumerate(tokens):
        if initial != None:
            init = copy.deepcopy(initial)
        else:
            init = None
        ref = ref[net.context:][:-1]
        if IM != None:
            can = sample(net,
                         word_dict,
                         index_dict,
                         len(ref),
                         IM[i],
                         initial=init)
        else:
            can = sample(net, word_dict, index_dict, len(ref), initial=init)

        # Compute bleu using n = (1,2,3)
        n1 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=1), n=1)], n=1)
        n2 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=2), n=2)], n=2)
        n3 = bleu.score_cooked(
            [bleu.cook_test(can, bleu.cook_refs([ref], n=3), n=3)], n=3)
        bleu_scores[i] = [n1, n2, n3]

    return bleu_scores
Ejemplo n.º 2
0
def main():
    sys.path.append("../scripts/training/cmert-0.5")
    import bleu
    data_dir = "test_scorer_data"
    nbest_file = os.path.join(data_dir, "nbest.out")
    ref_file = os.path.join(data_dir, "reference.txt")
    bleu.preserve_case = False
    bleu.eff_ref_len = "shortest"
    bleu.nonorm = 0

    ref_fh = open(ref_file)
    cookedrefs = []
    for ref in ref_fh:
        cookedref = bleu.cook_refs([ref])
        cookedrefs.append(cookedref)
    ref_fh.close()

    nbest_fh = open(nbest_file)
    tests = []
    i = -1
    for line in nbest_fh:
        fields = line.split("||| ")
        current_i = int(fields[0])
        text = fields[1]
        if i != current_i:
            tests.append([])
            i = current_i
        tests[-1].append(text)
    nbest_fh.close()

    #  score with first best
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        stats = " ".join([
            "%d %d" % (c, g)
            for (c, g) in zip(cookedtest['correct'], cookedtest['guess'])
        ])
        print " %s %d" % (stats, cookedtest['reflen'])
        cookedtests.append(cookedtest)
    bleu1 = bleu.score_cooked(cookedtests)

    # vary, and score again
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        if i == 7:
            sentence = tests[i][8]
        elif i == 1:
            sentences = tests[i][2]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        cookedtests.append(cookedtest)
    bleu2 = bleu.score_cooked(cookedtests)

    print "Bleus: ", bleu1, bleu2
Ejemplo n.º 3
0
def main():
    sys.path.append("../scripts/training/cmert-0.5")
    import bleu
    data_dir = "test_scorer_data"
    nbest_file = os.path.join(data_dir,"nbest.out")
    ref_file = os.path.join(data_dir,"reference.txt")
    bleu.preserve_case = False
    bleu.eff_ref_len = "shortest"
    bleu.nonorm = 0

    ref_fh = open(ref_file)
    cookedrefs = []
    for ref in ref_fh:
        cookedref = bleu.cook_refs([ref])
        cookedrefs.append(cookedref)
    ref_fh.close()
    
    nbest_fh = open(nbest_file)
    tests = []
    i = -1
    for line in nbest_fh:
        fields = line.split("||| ")
        current_i = int(fields[0])
        text = fields[1]
        if i != current_i:
            tests.append([])
            i = current_i
        tests[-1].append(text)
    nbest_fh.close()

    #  score with first best
    cookedtests = []
    for i  in range(len(tests)):
        sentence = tests[i][0]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        stats = " ".join(["%d %d" % (c,g) for (c,g) in zip(cookedtest['correct'], cookedtest['guess'])])
        print " %s %d" % (stats ,cookedtest['reflen'])
        cookedtests.append(cookedtest)
    bleu1 = bleu.score_cooked(cookedtests)

    # vary, and score again
    cookedtests = []
    for i in range(len(tests)):
        sentence = tests[i][0]
        if i == 7:
            sentence = tests[i][8]
        elif i == 1:
            sentences = tests[i][2]
        cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
        cookedtests.append(cookedtest)
    bleu2 = bleu.score_cooked(cookedtests)
    

    print "Bleus: ", bleu1,bleu2
Ejemplo n.º 4
0
def batch_bleu(cans, refs):
    """
    cans : [ 'XXX', 'XXX', ... ]
    refs : [ ['XXX', 'XXX', ... ], ['XXX', 'XXX', ... ], ... ]
    """
    bleu_scores = np.zeros((len(cans), 3))
    for i, can in enumerate(cans):
        n1 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=1), n=1)], n=1)
        n2 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=2), n=2)], n=2)
        n3 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs(refs[i], n=3), n=3)], n=3)
        bleu_scores[i] = [n1,n2,n3]
    return bleu_scores
def compute_bleu(net, word_dict, index_dict, tokens, initial=None, IM=None):
    """
    Return BLEU scores for reference tokens
    For each reference caption, a candidate caption is sampled from net
    """
    bleu_scores = np.zeros((len(tokens), 3))
    for i, ref in enumerate(tokens):
        if initial != None:
            init = copy.deepcopy(initial)
        else:
            init = None
        ref = ref[net.context:][:-1]
        if IM != None:
            can = sample(net, word_dict, index_dict, len(ref), IM[i], initial=init)
        else:
            can = sample(net, word_dict, index_dict, len(ref), initial=init)

        # Compute bleu using n = (1,2,3)
        n1 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=1), n=1)], n=1)
        n2 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=2), n=2)], n=2)
        n3 = bleu.score_cooked([bleu.cook_test(can, bleu.cook_refs([ref], n=3), n=3)], n=3)
        bleu_scores[i] = [n1,n2,n3]

    return bleu_scores
Ejemplo n.º 6
0
            bleu.preserve_case = True
        elif opt == "-t":
            bleu.nist_tokenize = False
        elif opt == "-p":
            bleu.clip_len = True
        elif opt == "-v":
            verbose = True

    test1 = []
    test2 = []
    for lines in itertools.izip(*[file(filename) for filename in args]):
        cookedrefs = bleu.cook_refs(lines[2:])
        test1.append(bleu.cook_test(lines[0], cookedrefs))
        test2.append(bleu.cook_test(lines[1], cookedrefs))

    score1 = bleu.score_cooked(test1)
    print "System 1: %f" % score1
    print "System 2: %f" % bleu.score_cooked(test2)

    better = worse = 0
    fake = test1[:]
    for i in xrange(len(fake)):
        fake[i] = test2[i]

        fake_score = bleu.score_cooked(fake)
        if fake_score > score1:
            better += 1
        elif fake_score < score1:
            worse += 1

        if verbose:
Ejemplo n.º 7
0
            bleu.preserve_case = True
        elif opt == "-t":
            bleu.nist_tokenize = False
        elif opt == "-p":
            bleu.clip_len = True
        elif opt == "-v":
            verbose = True

    test1 = []
    test2 = []
    for lines in itertools.izip(*[file(filename) for filename in args]):
        cookedrefs = bleu.cook_refs(lines[2:])
        test1.append(bleu.cook_test(lines[0], cookedrefs))
        test2.append(bleu.cook_test(lines[1], cookedrefs))

    print "System 1: %f" % bleu.score_cooked(test1)
    print "System 2: %f" % bleu.score_cooked(test2)

    better1 = better2 = 0

    n = 1000

    diffs = []
    for i in xrange(n):
        fake1 = []
        fake2 = []
        for j in xrange(len(test1)):
            r = random.randrange(len(test1))
            fake1.append(test1[r])
            fake2.append(test2[r])