Python BLEUMeasure.bleu Examples

Programming Language: Python

Namespace/Package Name: tgen.bleu

Class/Type: BLEUMeasure

Method/Function: bleu

Examples at hotexamples.com: 6

Python BLEUMeasure.bleu - 6 examples found. These are the top rated real world Python examples of tgen.bleu.BLEUMeasure.bleu extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BLEUMeasure(3)

append(3)

bleu(3)

common_substruct_stats(1)

ngram_precision(1)

p_r_f1(1)

reset(1)

size_stats(1)

Example #1

Show file

    def _compute_bleu(self, cur_valid_out, valid_trees):
        """Compute BLEU score of the current output on a set of validation trees. If the
        validation set is a tuple (two paraphrases), use them both for BLEU computation.

        @param cur_valid_out: the current system output on the validation DAs
        @param valid_trees: the gold trees for the validation DAs (one or two paraphrases)
        @return: BLEU score, as a float (percentage)
        """
        evaluator = BLEUMeasure()
        for pred_tree, gold_trees in zip(cur_valid_out, valid_trees):
            evaluator.append(pred_tree, gold_trees)
        return evaluator.bleu()

Example #2

Show file

File: seq2seq.py Project: pdsujnow/tgen

    def _compute_bleu(self, cur_valid_out, valid_trees):
        """Compute BLEU score of the current output on a set of validation trees. If the
        validation set is a tuple (two paraphrases), use them both for BLEU computation.

        @param cur_valid_out: the current system output on the validation DAs
        @param valid_trees: the gold trees for the validation DAs (one or two paraphrases)
        @return: BLEU score, as a float (percentage)
        """
        evaluator = BLEUMeasure()
        for pred_tree, gold_trees in zip(cur_valid_out, valid_trees):
            evaluator.append(pred_tree, gold_trees)
        return evaluator.bleu()

Example #3

Show file

    def _rerank_paths(self, paths, da):
        """Rerank the n-best decoded paths according to the reranking classifier and/or
        BLEU against context."""

        trees = [
            self.tree_embs.ids_to_tree(
                np.array(path.dec_inputs).transpose()[0]) for path in paths
        ]

        # rerank using BLEU against context if set to do so
        if self.context_bleu_weight:
            bm = BLEUMeasure(max_ngram=2)
            bleus = []
            for path, tree in zip(paths, trees):
                bm.reset()
                bm.append([(n.t_lemma, None) for n in tree.nodes[1:]], [da[0]])
                bleu = (bm.ngram_precision() if self.context_bleu_metric
                        == 'ngram_prec' else bm.bleu())
                bleus.append(bleu)
                path.logprob += self.context_bleu_weight * bleu

            log_debug(("BLEU for context: %s\n\n" %
                       " ".join([form for form, _ in da[0]])) +
                      "\n".join([("%.5f\t" % b) +
                                 " ".join([n.t_lemma for n in t.nodes[1:]])
                                 for b, t in zip(bleus, trees)]))

        # add distances to logprob so that non-fitting will be heavily penalized
        if self.classif_filter:
            self.classif_filter.init_run(da)
            fits = self.classif_filter.dist_to_cur_da(trees)
            for path, fit in zip(paths, fits):
                path.logprob -= self.misfit_penalty * fit

            log_debug(("Misfits for DA: %s\n\n" % str(da)) +
                      "\n".join([("%.5f\t" % fit) + " ".join(
                          [unicode(n.t_lemma) for n in tree.nodes[1:]])
                                 for fit, tree in zip(fits, trees)]))

        # adjust paths for length (if set to do so)
        if self.length_norm_weight:
            for path in paths:
                path.logprob /= len(path)**self.length_norm_weight

        return sorted(paths,
                      cmp=lambda p, q: cmp(p.logprob, q.logprob),
                      reverse=True)

Example #4

Show file

File: run_tgen.py Project: UFAL-DSG/tgen

def eval_tokens(das, eval_tokens, gen_tokens):
    """Evaluate generated tokens and print out statistics."""
    postprocess_tokens(eval_tokens, das)
    postprocess_tokens(gen_tokens, das)

    evaluator = BLEUMeasure()
    for pred_sent, gold_sents in zip(gen_tokens, eval_tokens):
        evaluator.append(pred_sent, gold_sents)
    log_info("BLEU score: %.4f" % (evaluator.bleu() * 100))

    evaluator = Evaluator()
    for pred_sent, gold_sents in zip(gen_tokens, eval_tokens):
        for gold_sent in gold_sents:  # effectively an average over all gold paraphrases
            evaluator.append(gold_sent, pred_sent)

    log_info("TOKEN precision: %.4f, Recall: %.4f, F1: %.4f" % evaluator.p_r_f1(EvalTypes.TOKEN))
    log_info("Sentence length stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaluator.size_stats())
    log_info("Common subphrase stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
             evaluator.common_substruct_stats())

Example #5

Show file

File: run_tgen.py Project: christinataft/tgen

def eval_tokens(das, eval_tokens, gen_tokens):
    """Evaluate generated tokens and print out statistics."""
    postprocess_tokens(eval_tokens, das)
    postprocess_tokens(gen_tokens, das)

    evaluator = BLEUMeasure()
    for pred_sent, gold_sents in zip(gen_tokens, eval_tokens):
        evaluator.append(pred_sent, gold_sents)
    log_info("BLEU score: %.4f" % (evaluator.bleu() * 100))

    evaluator = Evaluator()
    for pred_sent, gold_sents in zip(gen_tokens, eval_tokens):
        for gold_sent in gold_sents:  # effectively an average over all gold paraphrases
            evaluator.append(gold_sent, pred_sent)

    log_info("TOKEN precision: %.4f, Recall: %.4f, F1: %.4f" % evaluator.p_r_f1(EvalTypes.TOKEN))
    log_info("Sentence length stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaluator.size_stats())
    log_info("Common subphrase stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
             evaluator.common_substruct_stats())

Example #6

Show file

File: seq2seq.py Project: pdsujnow/tgen

    def _rerank_paths(self, paths, da):
        """Rerank the n-best decoded paths according to the reranking classifier and/or
        BLEU against context."""

        trees = [self.tree_embs.ids_to_tree(np.array(path.dec_inputs).transpose()[0])
                 for path in paths]

        # rerank using BLEU against context if set to do so
        if self.context_bleu_weight:
            bm = BLEUMeasure(max_ngram=2)
            bleus = []
            for path, tree in zip(paths, trees):
                bm.reset()
                bm.append([(n.t_lemma, None) for n in tree.nodes[1:]], [da[0]])
                bleu = (bm.ngram_precision()
                        if self.context_bleu_metric == 'ngram_prec'
                        else bm.bleu())
                bleus.append(bleu)
                path.logprob += self.context_bleu_weight * bleu

            log_debug(("BLEU for context: %s\n\n" % " ".join([form for form, _ in da[0]])) +
                      "\n".join([("%.5f\t" % b) + " ".join([n.t_lemma for n in t.nodes[1:]])
                                 for b, t in zip(bleus, trees)]))

        # add distances to logprob so that non-fitting will be heavily penalized
        if self.classif_filter:
            self.classif_filter.init_run(da)
            fits = self.classif_filter.dist_to_cur_da(trees)
            for path, fit in zip(paths, fits):
                path.logprob -= self.misfit_penalty * fit

            log_debug(("Misfits for DA: %s\n\n" % str(da)) +
                      "\n".join([("%.5f\t" % fit) +
                                 " ".join([unicode(n.t_lemma) for n in tree.nodes[1:]])
                                 for fit, tree in zip(fits, trees)]))

        # adjust paths for length (if set to do so)
        if self.length_norm_weight:
            for path in paths:
                path.logprob /= len(path) ** self.length_norm_weight

        return sorted(paths, cmp=lambda p, q: cmp(p.logprob, q.logprob), reverse=True)