Exemple #1
0
    def modified_precision(candidate, references, n):
        """ Calculate modified ngram precision.

        >>> BLEU.modified_precision(
        ...    'the the the the the the the'.split(),
        ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
        ...    n=1,
        ... )
        0.28...

        >>> BLEU.modified_precision(
        ...    'the the the the the the the'.split(),
        ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
        ...    n=2,
        ... )
        0.0

        >>> BLEU.modified_precision(
        ...    'of the'.split(),
        ...    [
        ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
        ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
        ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
        ...    ],
        ...    n=1,
        ... )
        1.0

        >>> BLEU.modified_precision(
        ...    'of the'.split(),
        ...    [
        ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
        ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
        ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
        ...    ],
        ...    n=2,
        ... )
        1.0

        """
        counts = Counter(ngrams(candidate, n))

        if not counts:
            return 0

        max_counts = {}
        for reference in references:
            reference_counts = Counter(ngrams(reference, n))
            for ngram in counts:
                max_counts[ngram] = max(max_counts.get(ngram, 0),
                                        reference_counts[ngram])

        clipped_counts = dict((ngram, min(count, max_counts[ngram]))
                              for ngram, count in counts.items())

        return sum(clipped_counts.values()) / sum(counts.values())
Exemple #2
0
def calculate_bleu_sentence(target_sentence, ref_sentence, n, fpr=1.0):
    '''
    target_sentences is a list of words for the target sentence
    ref_sentences is a list of ref sentences
    n is the ngram we adopt
    '''

    fscores = []
    precisions = []
    recalls = []

    for i in xrange(n):
        if n > len(target_sentence) or n > len(ref_sentence):
            fscores.append(0.0)
            precisions.append(0.0)
            recalls.append(0.0)
            continue
        else:
            counts = Counter(ngrams(target_sentence, i + 1))
            ref_counts = Counter(ngrams(ref_sentence, i + 1))
            max_counts = {}
            for ngram in counts:
                max_counts[ngram] = max(max_counts.get(ngram, 0),
                                        ref_counts[ngram])
            clipped_counts = dict((ngram, min(count, max_counts[ngram]))
                                  for ngram, count in counts.items())
            target_num = sum(counts.values())
            ref_num = sum(ref_counts.values())
            acc_num = sum(clipped_counts.values())

            if acc_num == 0:
                fscores.append(0.0)
                precisions.append(0.0)
                recalls.append(0.0)
            else:
                pre = float(acc_num) / float(target_num)
                rec = float(acc_num) / float(ref_num)
                f = ((fpr + 1) * pre * rec) / (rec + fpr * pre)
                fscores.append(f)
                precisions.append(pre)
                recalls.append(rec)

    B_s = []

    for i in xrange(n):
        weighted_s = math.fsum(fscores[:i + 1]) / float(i + 1)
        B_s.append(weighted_s)

    return B_s[n - 1]
Exemple #3
0
    def modified_precision(candidate, reference, n):
        """calculate modified ngram precision"""
        counts = Counter(ngrams(candidate, n))
        if not counts:
            return 0

        max_counts = {}
        
        reference_counts = Counter(ngrams(reference, n))
        for ngram in counts:
            max_counts[ngram] = max(max_counts.get(ngram, 0), \
                    reference_counts[ngram])
        clipped_counts = dict((ngram, min(count, max_counts[ngram])) \
                    for ngram, count in counts.items())
        return sum(clipped_counts.values()) / sum(counts.values())
Exemple #4
0
 def print_testtrain_stats():
     testscores = test_stats['rulescores']
     print("TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
         len(template_counts),
         len(tids)),
     )
     print("TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
           "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats))
     print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
           "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
     weighted_testcounts = Counter()
     for (tid, score) in zip(tids, testscores):
         weighted_testcounts[tid] += score
     tottestscores = sum(testscores)
     head = "#ID | Score (test) | Score (train) |  #Rules     | Template"
     print(head, "\n", "-" * len(head), sep="")
     test_tplscores = sorted(weighted_testcounts.items(), key=det_tplsort, reverse=True)
     for (tid, testscore) in test_tplscores:
         s = "{0:s} |{1:5d}  {2:6.3f} |  {3:4d}   {4:.3f} |{5:4d}   {6:.3f} | {7:s}".format(
             tid,
             testscore,
             testscore/tottestscores,
             weighted_traincounts[tid],
             weighted_traincounts[tid]/tottrainscores,
             template_counts[tid],
             template_counts[tid]/len(tids),
             Template.ALLTEMPLATES[int(tid)],
         )
         print(s)
Exemple #5
0
    def similar(self, word, num=20):
        """
        Distributional similarity: find other words which appear in the
        same contexts as the specified word; list most similar words first.

        :param word: The word used to seed the similarity search
        :type word: str
        :param num: The number of words to generate (default=20)
        :type num: int
        :seealso: ContextIndex.similar_words()
        """
        if '_word_context_index' not in self.__dict__:
            #print('Building word-context index...')
            self._word_context_index = ContextIndex(
                self.tokens,
                filter=lambda x: x.isalpha(),
                key=lambda s: s.lower())

#        words = self._word_context_index.similar_words(word, num)

        word = word.lower()
        wci = self._word_context_index._word_to_contexts
        if word in wci.conditions():
            contexts = set(wci[word])
            fd = Counter(w for w in wci.conditions() for c in wci[w]
                         if c in contexts and not w == word)
            words = [w for w, _ in fd.most_common(num)]
            print(tokenwrap(words))
        else:
            print("No matches")
Exemple #6
0
def similar(word, thesaurus, num=20):
    """
        This method uses a co-occurence thesaurus to find related
        words. It is basically a slightly modified version of the
        nltk equivalent which only prints to console (rather than
        return the results).
    """
    if '_word_context_index' not in thesaurus.__dict__:
        thesaurus._word_context_index = ContextIndex(
            thesaurus.tokens,
            filter=lambda x: x.isalpha(),
            key=lambda s: s.lower())

    word = word.lower()
    wci = thesaurus._word_context_index._word_to_contexts
    if word in wci.conditions():
        contexts = set(wci[word])
        fd = Counter(w for w in wci.conditions() for c in wci[w]
                     if c in contexts and not w == word)
        words = [w for w, _ in fd.most_common(num)]
        return words
    else:
        return None
Exemple #7
0
    def print_template_statistics(self, test_stats=None, printunused=True):
        """
        Print a list of all templates, ranked according to efficiency.

        If test_stats is available, the templates are ranked according to their
        relative contribution (summed for all rules created from a given template,
        weighted by score) to the performance on the test set. If no test_stats, then
        statistics collected during training are used instead. There is also
        an unweighted measure (just counting the rules). This is less informative,
        though, as many low-score rules will appear towards end of training.

        :param test_stats: dictionary of statistics collected during testing
        :type test_stats: dict of str -> any (but usually numbers)
        :param printunused: if True, print a list of all unused templates
        :type printunused: bool
        :return: None
        :rtype: None
        """
        tids = [r.templateid for r in self._rules]
        train_stats = self.train_stats()

        trainscores = train_stats['rulescores']
        assert len(trainscores) == len(tids), "corrupt statistics: " \
            "{0} train scores for {1} rules".format(trainscores, tids)
        template_counts = Counter(tids)
        weighted_traincounts = Counter()
        for (tid, score) in zip(tids, trainscores):
            weighted_traincounts[tid] += score
        tottrainscores = sum(trainscores)

        # det_tplsort() is for deterministic sorting;
        # the otherwise convenient Counter.most_common() unfortunately
        # does not break ties deterministically
        # between python versions and will break cross-version tests
        def det_tplsort(tpl_value):
            return (tpl_value[1], repr(tpl_value[0]))

        def print_train_stats():
            print("TEMPLATE STATISTICS (TRAIN)  {0} templates, {1} rules)".format(
                len(template_counts),
                len(tids))
            )
            print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
            head = "#ID | Score (train) |  #Rules     | Template"
            print(head, "\n", "-" * len(head), sep="")
            train_tplscores = sorted(weighted_traincounts.items(), key=det_tplsort, reverse=True)
            for (tid, trainscore) in train_tplscores:
                s = "{0} | {1:5d}   {2:5.3f} |{3:4d}   {4:.3f} | {5}".format(
                    tid,
                    trainscore,
                    trainscore/tottrainscores,
                    template_counts[tid],
                    template_counts[tid]/len(tids),
                    Template.ALLTEMPLATES[int(tid)],
                )
                print(s)

        def print_testtrain_stats():
            testscores = test_stats['rulescores']
            print("TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
                len(template_counts),
                len(tids)),
            )
            print("TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats))
            print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
            weighted_testcounts = Counter()
            for (tid, score) in zip(tids, testscores):
                weighted_testcounts[tid] += score
            tottestscores = sum(testscores)
            head = "#ID | Score (test) | Score (train) |  #Rules     | Template"
            print(head, "\n", "-" * len(head), sep="")
            test_tplscores = sorted(weighted_testcounts.items(), key=det_tplsort, reverse=True)
            for (tid, testscore) in test_tplscores:
                s = "{0:s} |{1:5d}  {2:6.3f} |  {3:4d}   {4:.3f} |{5:4d}   {6:.3f} | {7:s}".format(
                    tid,
                    testscore,
                    testscore/tottestscores,
                    weighted_traincounts[tid],
                    weighted_traincounts[tid]/tottrainscores,
                    template_counts[tid],
                    template_counts[tid]/len(tids),
                    Template.ALLTEMPLATES[int(tid)],
                )
                print(s)

        def print_unused_templates():
            usedtpls = set([int(tid) for tid in tids])
            unused = [(tid, tpl) for (tid, tpl) in enumerate(Template.ALLTEMPLATES) if tid not in usedtpls]
            print("UNUSED TEMPLATES ({0})".format(len(unused)))

            for (tid, tpl) in unused:
                print("{0:03d} {1:s}".format(tid, str(tpl)))

        if test_stats is None:
            print_train_stats()
        else:
            print_testtrain_stats()
        print()
        if printunused:
            print_unused_templates()
        print()
Exemple #8
0
def _modified_precision(candidate, references, n):
    """Calculate modified ngram precision.

    The normal precision method may lead to some wrong translations with
    high-precision, e.g., the translation, in which a word of reference
    repeats several times, has very high precision. So in the modified
    n-gram precision, a reference word will be considered exhausted after
    a matching candidate word is identified.

    Paper examples:

    >>> _modified_precision(
    ...    'the the the the the the the'.split(),
    ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
    ...    n=1,
    ... )
    0.28...

    >>> _modified_precision(
    ...    'the the the the the the the'.split(),
    ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
    ...    n=2,
    ... )
    0.0

    >>> _modified_precision(
    ...    'of the'.split(),
    ...    [
    ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
    ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
    ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
    ...    ],
    ...    n=1,
    ... )
    1.0

    >>> _modified_precision(
    ...    'of the'.split(),
    ...    [
    ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
    ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
    ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
    ...    ],
    ...    n=2,
    ... )
    1.0

    More examples:

    >>> weights = [0.25, 0.25, 0.25, 0.25]
    >>> candidate1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
    ...               'ensures', 'that', 'the', 'military', 'always',
    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']

    >>> candidate2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
    ...               'that', 'party', 'direct']

    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
    ...               'heed', 'Party', 'commands']

    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
    ...               'guarantees', 'the', 'military', 'forces', 'always',
    ...               'being', 'under', 'the', 'command', 'of', 'the',
    ...               'Party']

    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
    ...               'of', 'the', 'party']

    Unigrams:

    >>> _modified_precision(
    ...    candidate1,
    ...    [reference1, reference2, reference3],
    ...    n=1,
    ... )
    0.94...

    >>> _modified_precision(
    ...    candidate2,
    ...    [reference1, reference2, reference3],
    ...    n=1,
    ... )
    0.57...

    Bigrams:

    >>> _modified_precision(
    ...    candidate1,
    ...    [reference1, reference2, reference3],
    ...    n=2,
    ... )
    0.58...

    >>> _modified_precision(
    ...    candidate2,
    ...    [reference1, reference2, reference3],
    ...    n=2,
    ... )
    0.07...

    """
    counts = Counter(ngrams(candidate, n))

    if not counts:
        return 0

    max_counts = {}
    for reference in references:
        reference_counts = Counter(ngrams(reference, n))
        for ngram in counts:
            max_counts[ngram] = max(max_counts.get(ngram, 0),
                                    reference_counts[ngram])

    clipped_counts = dict((ngram, min(count, max_counts[ngram]))
                          for ngram, count in counts.items())

    return sum(clipped_counts.values()) / sum(counts.values())
Exemple #9
0
def _modified_precision(references, hypothesis, n):
    """
    Calculate modified ngram precision.

    The normal precision method may lead to some wrong translations with
    high-precision, e.g., the translation, in which a word of reference
    repeats several times, has very high precision. 
    
    The famous "the the the ... " example shows that you can get BLEU precision
    by duplicating high frequency words.
    
        >>> reference1 = 'the cat is on the mat'.split()
        >>> reference2 = 'there is a cat on the mat'.split()
        >>> hypothesis1 = 'the the the the the the the'.split()
        >>> references = [reference1, reference2]
        >>> _modified_precision(references, hypothesis1, n=1)
        0.2857142857142857
    
    In the modified n-gram precision, a reference word will be considered 
    exhausted after a matching hypothesis word is identified, e.g.
    
        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
        ...               'ensures', 'that', 'the', 'military', 'will', 
        ...               'forever', 'heed', 'Party', 'commands']
        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
        ...               'guarantees', 'the', 'military', 'forces', 'always',
        ...               'being', 'under', 'the', 'command', 'of', 'the',
        ...               'Party']
        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
        ...               'of', 'the', 'party']
        >>> hypothesis = 'of the'.split()
        >>> references = [reference1, reference2, reference3]
        >>> _modified_precision(references, hypothesis, n=1)
        1.0
        >>> _modified_precision(references, hypothesis, n=2)
        1.0
        
    An example of a normal machine translation hypothesis:
    
        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
        ...               'ensures', 'that', 'the', 'military', 'always',
        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
        
        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
        ...               'that', 'party', 'direct']
    
        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
        ...               'ensures', 'that', 'the', 'military', 'will', 
        ...               'forever', 'heed', 'Party', 'commands']
        
        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
        ...               'guarantees', 'the', 'military', 'forces', 'always',
        ...               'being', 'under', 'the', 'command', 'of', 'the',
        ...               'Party']
        
        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
        ...               'of', 'the', 'party']
        >>> references = [reference1, reference2, reference3]
        >>> _modified_precision(references, hypothesis1, n=1)
        0.9444444444444444
        >>> _modified_precision(references, hypothesis2, n=1)
        0.5714285714285714
        >>> _modified_precision(references, hypothesis1, n=2)
        0.5882352941176471
        >>> _modified_precision(references, hypothesis2, n=2)
        0.07692307692307693

    :param references: A list of reference translations.
    :type references: list(list(str))
    :param hypothesis: A hypothesis translation.
    :type hypothesis: list(str)
    :param n: The ngram order.
    :type n: int
    """
    counts = Counter(ngrams(hypothesis, n))

    if not counts:
        return 0

    max_counts = {}
    for reference in references:
        reference_counts = Counter(ngrams(reference, n))
        for ngram in counts:
            max_counts[ngram] = max(max_counts.get(ngram, 0),
                                    reference_counts[ngram])

    clipped_counts = dict((ngram, min(count, max_counts[ngram]))
                          for ngram, count in counts.items())

    return sum(clipped_counts.values()) / sum(counts.values())
Exemple #10
0
def compute_tf_idf(tokens, tf, idf):
    tfidf = Counter()
    for t in tokens:
        tfidf[t] = tf[t] * idf[t]
    return tfidf
Exemple #11
0
def compute_inverse_document_frequency(tf, tokens):
    idf = Counter()
    for t in tokens:
        if tf[t] > 0:
            idf[t] = math.log(1 / tf[t])
    return idf
Exemple #12
0
def compute_term_frequency(tokens):
    tf = Counter(tokens)
    return tf