def computeBLEU(outputs, targets, corpus=False, tokenizer=None, segmenter=None): outputs = [tokenizer(o) for o in outputs] targets = [tokenizer(t) for t in targets] if segmenter is not None: outputs = segmenter(outputs) targets = segmenter(targets) if not corpus: return torch.Tensor([sentence_gleu( [t], o) for o, t in zip(outputs, targets)]) return corpus_bleu([[t] for t in targets], [o for o in outputs], emulate_multibleu=True)
def computeGroupBLEU(outputs, targets, tokenizer=None, bra=10, maxmaxlen=80): outputs = [tokenizer(o) for o in outputs] targets = [tokenizer(t) for t in targets] maxlens = max([len(t) for t in targets]) print(maxlens) maxlens = min([maxlens, maxmaxlen]) nums = int(np.ceil(maxlens / bra)) outputs_buckets = [[] for _ in range(nums)] targets_buckets = [[] for _ in range(nums)] for o, t in zip(outputs, targets): idx = len(o) // bra if idx >= len(outputs_buckets): idx = -1 outputs_buckets[idx] += [o] targets_buckets[idx] += [t] for k in range(nums): print(corpus_bleu([[t] for t in targets_buckets[k]], [o for o in outputs_buckets[k]], emulate_multibleu=True))