Example #1
0
def _get_sent_cider(
        hypothesis: List[str],
        references: List[List[str]],
        extra_args: Optional[Dict[str, str]] = None) -> List[float]:
    n_workers = get_optional_dict(extra_args, 'n_workers', 1)
    verbose = get_optional_dict(extra_args, 'verbose', False)
    return _CIDErScorer(n_workers=n_workers, verbose=verbose).get_sent_scores(
        hypothesis, references)
Example #2
0
def _get_sent_bleu(hypothesis: List[str],
                   references: List[List[str]],
                   extra_args: Optional[Dict[str, str]] = None) -> List[float]:
    tokenizer = get_optional_dict(extra_args, 'bleu_tokenizer', 'none')
    data = [hypothesis] + references
    return [
        sb.corpus_bleu([h], [[rr] for rr in r],
                       smooth_method='floor',
                       use_effective_order=True,
                       force=True,
                       tokenize=tokenizer).score for h, *r in zip(*data)
    ]
Example #3
0
def _get_sent_bleu(
        hypothesis: List[str], references: List[List[str]],
        extra_args: Optional[Dict[str, str]] = None, score='score'
) -> List[float]:
    tokenizer = get_optional_dict(extra_args, 'tokenizer', 'none')
    data = [hypothesis] + references
    args = get_default_args(smooth_method='floor', tokenize=tokenizer,
                            num_refs=len(references))
    scorer = BLEU(args)
    scores = [
        scorer.corpus_score([h], [[rr] for rr in r], use_effective_order=True)
        for h, *r in zip(*data)
    ]
    proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score)
    return [proj(s) for s in scores]
Example #4
0
 def score_corpus_multiprocess(
         self, hypothesis: List[str], references: List[List[str]],
         score='score'
 ) -> float:
     tokenizer = get_optional_dict(self.extra_args, 'tokenizer', 'none')
     args = get_default_args(tokenize=tokenizer, num_refs=len(references))
     scorer = BLEU(args)
     if self.n_workers == 1:
         corpus_score = scorer.corpus_score(
             hypothesis, references, use_effective_order=False
         )
     else:
         batches = list(
             self._batch(hypothesis, references, n_batches=self.n_workers)
         )
         ref_len, sys_len = 0, 0
         correct = [0 for _ in range(BLEU.NGRAM_ORDER)]
         total = [0 for _ in range(BLEU.NGRAM_ORDER)]
         with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
             futures = [
                 executor.submit(
                     scorer.corpus_score, b[0], b[1],
                     use_effective_order=False
                 )
                 for b in batches
             ]
             progress = as_completed(futures)
             if self.verbose:
                 progress = tqdm(progress)
             for future in progress:
                 s = future.result()
                 ref_len += s.ref_len
                 sys_len += s.sys_len
                 for n in range(BLEU.NGRAM_ORDER):
                     correct[n] += s.counts[n]
                     total[n] += s.totals[n]
             corpus_score = scorer.compute_bleu(
                 correct, total, sys_len, ref_len, smooth_method='exp'
             )
     proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score)
     return proj(corpus_score)
Example #5
0
 def score_corpus_multiprocess(self, hypothesis: List[str],
                               references: List[List[str]]) -> float:
     tokenizer = get_optional_dict(self.extra_args, 'bleu_tokenizer',
                                   'none')
     if self.n_workers == 1:
         corpus_score = sb.corpus_bleu(hypothesis,
                                       references,
                                       force=True,
                                       tokenize=tokenizer).score
     else:
         batches = list(
             self._batch(hypothesis, references, n_batches=self.n_workers))
         ref_len, sys_len = 0, 0
         correct = [0 for _ in range(sb.NGRAM_ORDER)]
         total = [0 for _ in range(sb.NGRAM_ORDER)]
         with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
             futures = [
                 executor.submit(sb.corpus_bleu,
                                 b[0],
                                 b[1],
                                 force=True,
                                 tokenize=tokenizer) for b in batches
             ]
             progress = as_completed(futures)
             if self.verbose:
                 progress = tqdm(progress)
             for future in progress:
                 s = future.result()
                 ref_len += s.ref_len
                 sys_len += s.sys_len
                 for n in range(sb.NGRAM_ORDER):
                     correct[n] += s.counts[n]
                     total[n] += s.totals[n]
             corpus_score = sb.compute_bleu(correct,
                                            total,
                                            sys_len,
                                            ref_len,
                                            smooth_method='exp').score
     return corpus_score