def _precision(self, ref, out, src, n): """ Calcualte GLEU-specific n-gram precision Args: ref: A reference sentence out: An output sentence src: A source sentence Returns: Numerator and denominator of the precision """ ref_ngram = ngram_utils.sent_ngrams_list(ref, n) out_ngram = ngram_utils.sent_ngrams_list(out, n) src_ngram = ngram_utils.sent_ngrams_list(src, n) ref_cnt = Counter(ref_ngram) out_cnt = Counter(out_ngram) src_cnt = Counter(src_ngram) out_join_ref = out_cnt & ref_cnt out_join_src = out_cnt & src_cnt num = sum(out_join_ref.values()) - \ sum((out_join_src - out_join_ref).values()) # According to https://github.com/cnap/gec-ranking/blob/master/scripts/gleu.py num = max(num, 0) denom = sum(out_cnt.values()) return num, denom
def num_repetitions_in_sentence(sentence: Tokens, adjacent: bool = True, ngram_order: int = 1) -> int: """ Counts repetitions in an input sentence. :param sentence: A list of tokens or characters as strings. :param adjacent: Whether repeated elements need to occur adjacent to each other to count towards repetitions. :param ngram_order: Order of ngrams considered, positive integer. :return: Number of times an element was repeated. """ num_repetitions = 0 ngrams = ngram_utils.sent_ngrams_list(sentence, ngram_order) if not adjacent: counter = Counter(ngrams) for k, v in counter.items(): num_repetitions += (v - 1) else: previous = [] for ngram in ngrams: if ngram in previous: num_repetitions += 1 previous.append(ngram) if len(previous) > ngram_order: previous.pop(0) return num_repetitions
def _precision(self, ref, out, n): """ Caculate n-gram precision Args: ref: A reference sentence out: An output sentence Returns: Numerator and denominator of the precision """ out_ngram = ngram_utils.sent_ngrams_list(out, n) ref_ngram = ngram_utils.sent_ngrams_list(ref, n) out_cnt = Counter(out_ngram) ref_cnt = Counter(ref_ngram) num = 0 denom = 0 for ngram, o_cnt in out_cnt.items(): num += min(o_cnt, ref_cnt[ngram]) denom += o_cnt denom = max(1, denom) return num, denom