Exemple #1
0
    def _precision(self, ref, out, src, n):
        """
    Calcualte GLEU-specific n-gram precision

    Args:
      ref: A reference sentence
      out: An output sentence
      src: A source sentence

    Returns:
      Numerator and denominator of the precision
    """
        ref_ngram = ngram_utils.sent_ngrams_list(ref, n)
        out_ngram = ngram_utils.sent_ngrams_list(out, n)
        src_ngram = ngram_utils.sent_ngrams_list(src, n)
        ref_cnt = Counter(ref_ngram)
        out_cnt = Counter(out_ngram)
        src_cnt = Counter(src_ngram)

        out_join_ref = out_cnt & ref_cnt
        out_join_src = out_cnt & src_cnt

        num = sum(out_join_ref.values()) - \
              sum((out_join_src - out_join_ref).values())
        # According to https://github.com/cnap/gec-ranking/blob/master/scripts/gleu.py
        num = max(num, 0)
        denom = sum(out_cnt.values())

        return num, denom
Exemple #2
0
def num_repetitions_in_sentence(sentence: Tokens,
                                adjacent: bool = True,
                                ngram_order: int = 1) -> int:
    """
    Counts repetitions in an input sentence.

    :param sentence: A list of tokens or characters as strings.
    :param adjacent: Whether repeated elements need to occur adjacent
                     to each other to count towards repetitions.
    :param ngram_order: Order of ngrams considered, positive integer.

    :return: Number of times an element was repeated.
    """
    num_repetitions = 0

    ngrams = ngram_utils.sent_ngrams_list(sentence, ngram_order)

    if not adjacent:
        counter = Counter(ngrams)
        for k, v in counter.items():
            num_repetitions += (v - 1)

    else:
        previous = []

        for ngram in ngrams:
            if ngram in previous:
                num_repetitions += 1
            previous.append(ngram)

            if len(previous) > ngram_order:
                previous.pop(0)

    return num_repetitions
Exemple #3
0
    def _precision(self, ref, out, n):
        """
    Caculate n-gram precision 

    Args:
      ref: A reference sentence
      out: An output sentence

    Returns:
      Numerator and denominator of the precision
    """
        out_ngram = ngram_utils.sent_ngrams_list(out, n)
        ref_ngram = ngram_utils.sent_ngrams_list(ref, n)
        out_cnt = Counter(out_ngram)
        ref_cnt = Counter(ref_ngram)

        num = 0
        denom = 0
        for ngram, o_cnt in out_cnt.items():
            num += min(o_cnt, ref_cnt[ngram])
            denom += o_cnt
        denom = max(1, denom)

        return num, denom