def bleu_(generate_corpus, reference_corpus, n_grams, get_avg=False): r""" Calculate the BLEU metrics of the generated corpus in referenced corpus. Args: generate_corpus (List[List[str]]): the generated corpus reference_corpus (List[List[str]]): the referenced corpus n_grams (List): the n-gram metric to be calculated get_avg (Bool, optional): whether to calculate the average BLEU score, default: False Returns: List or (List, float): the BLEU results and optinal average BLEU score """ weight = [0] * max(n_grams) weights = {} for n_gram in n_grams: weight[n_gram - 1] = 1.0 weights[n_gram] = tuple(weight) weight[n_gram - 1] = 0.0 if get_avg: weights['avg-bleu'] = tuple([0.25] * 4) bleu = BLEU(reference_corpus, weights) scores = bleu.get_score(generate_corpus) results = [] for n_gram in n_grams: score = np.array(scores[n_gram]) results.append(score.mean()) if get_avg: avg_bleu = np.array(scores['avg-bleu']).mean() return results, avg_bleu return results
def bleu(cond, uncond, weight): bleu = BLEU(uncond, weights) score = bleu.get_score(cond) mean_score = {} for k,v in score.items(): v = sum(v) / float(len(v)) mean_score = {k : v} return score, mean_score
def _calc_fast_bleu(self, generate_corpus, reference_corpus): r""" Calculate the BLEU metrics of the generated corpus in referenced corpus. Args: generate_corpus (List[List[str]]): the generated corpus reference_corpus (List[List[str]]): the referenced corpus n_grams (List): the n-gram metric to be calculated Returns: list: the BLEU results and average BLEU scores """ bleu = BLEU(reference_corpus, self.weights) scores = bleu.get_score(generate_corpus) return scores
class ReverseBleu(BaseMetric): def __init__(self, ref_samples, hyp_samples, min_n=2, max_n=5, parser: ReversibleField = None, parse=True): super().__init__() from fast_bleu import BLEU as FBLEU assert max_n >= min_n assert min_n >= 1 if parse: ref_samples = parser.reverse(ref_samples) hyp_samples = parser.reverse(hyp_samples) self.ref_tokens = [parser.tokenize(r) for r in ref_samples] self.hyp_tokens = [parser.tokenize(r) for r in hyp_samples] self.parser = parser w = {i: np.ones(i) / i for i in range(min_n, max_n + 1)} self.bleu = FBLEU(self.hyp_tokens, w, verbose=True) print('LOG: ReverseBLEU init done!') def get_score(self): print('LOG: calculating ReverseBLEU!') scores = self.bleu.get_score(self.ref_tokens) result = ({run: np.mean(scores[run]) for run in scores.keys()}, scores) print('LOG: done!') return result
def __init__(self, references, parser: ReversibleField = None, parse=True): super().__init__('bleu') if parse: references = parser.reverse(references) ref_tokens = [parser.tokenize(r) for r in references] self.parser = parser from fast_bleu import BLEU as FBLEU w = {i: np.ones(i) / i for i in range(2, 6)} self.bleu = FBLEU(ref_tokens, w) print('bleu instance created!')
class Bleu(Metric): def __init__(self, references, parser: ReversibleField = None, parse=True): super().__init__('bleu') if parse: references = parser.reverse(references) ref_tokens = [parser.tokenize(r) for r in references] self.parser = parser from fast_bleu import BLEU as FBLEU w = {i: np.ones(i) / i for i in range(2, 6)} self.bleu = FBLEU(ref_tokens, w) print('bleu instance created!') def eval(self, samples, parse=True): if parse: samples = self.parser.reverse(samples) samples = [self.parser.tokenize(r) for r in samples] scores = self.bleu.get_score(samples) return {k: np.mean(scores[k]) for k in scores.keys()}
def __init__(self, samples, min_n=2, max_n=5, parser: ReversibleField = None, parse=True): super().__init__() from fast_bleu import BLEU as FBLEU assert max_n >= min_n assert min_n >= 1 if parse: samples = parser.reverse(samples) ref_tokens = [parser.tokenize(r) for r in samples] self.parser = parser w = {i: np.ones(i) / i for i in range(min_n, max_n + 1)} self.bleu = FBLEU(ref_tokens, w, verbose=True) print('LOG: BLEU init done!')