예제 #1
0
파일: utils.py 프로젝트: GZJAS/Squirrel
def computeBLEU(outputs, targets, corpus=False, tokenizer=None, segmenter=None):
    outputs = [tokenizer(o) for o in outputs]
    targets = [tokenizer(t) for t in targets]

    if segmenter is not None:
        outputs = segmenter(outputs)
        targets = segmenter(targets)

    if not corpus:
        return torch.Tensor([sentence_gleu(
            [t],  o) for o, t in zip(outputs, targets)])
    return corpus_bleu([[t] for t in targets], [o for o in outputs], emulate_multibleu=True)
예제 #2
0
파일: utils.py 프로젝트: GZJAS/Squirrel
def computeGroupBLEU(outputs, targets, tokenizer=None, bra=10, maxmaxlen=80):
    outputs = [tokenizer(o) for o in outputs]
    targets = [tokenizer(t) for t in targets]
    maxlens = max([len(t) for t in targets])
    print(maxlens)
    maxlens = min([maxlens, maxmaxlen])
    nums = int(np.ceil(maxlens / bra))
    outputs_buckets = [[] for _ in range(nums)]
    targets_buckets = [[] for _ in range(nums)]
    for o, t in zip(outputs, targets):
        idx = len(o) // bra
        if idx >= len(outputs_buckets):
            idx = -1
        outputs_buckets[idx] += [o]
        targets_buckets[idx] += [t]

    for k in range(nums):
        print(corpus_bleu([[t] for t in targets_buckets[k]], [o for o in outputs_buckets[k]], emulate_multibleu=True))