Beispiel #1
0
    def organize_words(self):
        """
         After run all the generation, we organize the sentence finding the first word in the original sentence
         and through blue evaluation finding the words after it.
        :return:
        """
        # -------- Finds the first word -------- #
        word_fit = []
        for word in self.found_sentence:
            fitness = bleu(hypothesis=word,
                           references=self.target_sentence.split(' ')[:1],
                           auto_reweigh=True)
            word_fit.append((word, fitness))

        word_fit.sort(key=lambda tup: tup[1], reverse=True)

        first_word = word_fit[0][0]
        # -------------------------------------------

        self.found_sentence.remove(first_word)

        # ----- search for the next words -------------#
        final_sentence = [first_word]
        bi_gram = [first_word]
        fitness = []
        i = 1

        while len(self.target_sentence.split(' ')) > i:
            i += 1
            for word in self.found_sentence:
                if sys.intern(word) is not sys.intern(bi_gram[0]):
                    bi_gram.append(word)
                    bi_fitness = bleu(
                        hypothesis=bi_gram,
                        references=[self.target_sentence.split(' ')],
                        auto_reweigh=True)
                    bi_gram = bi_gram[:-1]
                    fitness.append((word, bi_fitness))

            fitness.sort(key=lambda tup: tup[-1], reverse=True)
            final_sentence.append(fitness[0][0])
            bi_gram = [fitness[0][0]]
            fitness = []

        last_fitness = bleu(hypothesis=final_sentence,
                            references=[self.target_sentence.split(' ')],
                            auto_reweigh=True)

        print("Last generation: {}\nLast Fitness {}".format(
            final_sentence, last_fitness))
Beispiel #2
0
 def test_simple(self):
     sentence = """我最爱吃的东西是凤梨"""
     tokens = jieba.lcut(sentence)
     self.logger.debug(','.join(tokens))
     jieba.add_word('爱吃')
     references_1 = jieba.lcut('我爱吃的东西是凤梨啊')
     self.logger.debug('references 1: %s', references_1)
     references_2 = jieba.lcut('他不爱吃苹果')
     self.logger.debug('references 2: %s', references_2)
     references_3 = jieba.lcut('我们都是中国人地地道道')
     self.logger.debug('references 3: %s', references_3)
     score = nltk.bleu([references_1], tokens)
     self.logger.debug('bleu score is %s', score)
     pass
Beispiel #3
0
 def py_compute_sentence_bleu(self, prediction, reference, debug=False):
     scores = []
     for pred_i, ref_i in zip(prediction, reference):
         pred_i, ref_i = map(self.crop_eos, [pred_i, ref_i])
         if len(pred_i) > 0 and len(ref_i) > 0:
             score_i = nltk.bleu([ref_i],
                                 pred_i,
                                 smoothing_function=self.smoothing_function)
         else:
             score_i = 0
         scores.append(score_i)
         if debug:
             print('pred and ref:', pred_i, ref_i)
             print('score:', score_i)
     return np.array(scores, dtype=np.float32)
Beispiel #4
0
def avg_bleu_score(sen, summaries, avg=False):
    min_length = 5
    if avg:
        from nltk.translate.bleu_score import SmoothingFunction
        chencherry = SmoothingFunction()
        total = 0
        for summ in summaries:
            total += bleu([summ], sen, smoothing_function=chencherry.method2)
        score = total / len(summaries)
    else:
        #        score = bleu(summaries, sen, smoothing_function=chencherry.method2)
        score = nltk.translate.bleu_score.modified_precision(summaries, sen, 2)
        if len(sen) < min_length:
            import numpy as np
            score *= np.exp(1 - (min_length / len(sen)))
    return score
Beispiel #5
0
    def run_evaluations(self):
        """
            Run the evaluations of each word in the vocabulary to add it or not to the
            next generation of 'parents' word
        :return:
        """

        bi_gram = []
        i = 0

        self.initialize_population()
        print("target sentence: ", self.target_sentence.split(' '))
        print("Vocabulary: ", self.tar_vocabulary)

        parents = list(itertools.chain.from_iterable(self.generation))
        print("Parents: ", parents)

        while len(self.target_sentence.split(' ')) > i:

            evaluation_array = []
            for word in self.tar_vocabulary:

                if sys.intern(word) is not sys.intern(parents[-1]):
                    parents.append(word)
                    bi_gram.append(word)
                    # Markov Hidden states
                    fitness = bleu(
                        hypothesis=parents,
                        references=[self.target_sentence.split(' ')],
                        auto_reweigh=True)

                    evaluation_array.append((word, fitness))
                    parents = parents[:-1]
                bi_gram = []
            evaluation_array.sort(key=lambda tup: tup[-1], reverse=True)
            print(evaluation_array)
            i += 1
            new_word = evaluation_array[0][0]
            if new_word not in parents:
                parents.append(new_word)
            else:
                self.tar_vocabulary.remove(new_word)

        self.found_sentence = parents

        self.organize_words()
Beispiel #6
0
def bleu(model,
         prior,
         prefix_length,
         tokenize,
         **kwargs):
    prefix, reference = prior[:prefix_length], prior[prefix_length:]
    model.reset_generator()
    hypothesis = model.generate(prefix=prefix,
                                length=len(reference),
                                verbose=0,
                                **kwargs)

    reference = tokenize(reference)
    hypothesis = tokenize(hypothesis)

    score = nltk.bleu([reference], hypothesis)
#   score = nltk.bleu_score.modified_precision([reference], hypothesis, n=4)
    return score
Beispiel #7
0
def score(hyp, refs):
    return bleu(refs,
                hyp,
                weights=weights,
                smoothing_function=SmoothingFunction().method1)
    # print(maxprob)
    return {"translation": translated_sent, "probability": maxprob}


with open("../CleanedEnglish1000.txt", "r") as fp1:
    with open("../CleanedFrench1000.txt", "r") as fp2:

        for num in range(0, 5):
            frSent = fp2.readline()
            enSent = fp1.readline()

            frSent = re.sub("[\n]", "", frSent)
            enSent = re.sub("[\n]", "", enSent)
            print ("=====> Read french Sentence")
            print ("=====> Translating to english")
            out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam)
            print ("=====> Finished translation")
            print ("==================================")
            print "French Sentence: ", frSent
            print ("----------------------------------")
            print "English Sentence: ", enSent
            print ("----------------------------------")
            print "Translated Sentence: ", out["translation"]
            print ("----------------------------------")

            ## Computing bleu score
            print "BLEU score of translation", nltk.bleu(
                out["translation"], [enSent, enSent, enSent], [0.25, 0.25, 0.25, 0.25]
            )
            print ("==================================")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--out_dir",
                        type=str,
                        required=True,
                        help="The directory of the outputs")
    args = parser.parse_args()

    print("\t".join(["Setup", "LM", "BLEU", "ROUGE"]))

    for setup in [
            "rationale", "multi", "update_rationale", "update_type_rationale"
    ]:
        for lm in ["bart-large", "gpt2-xl"]:

            # Compute BLEU and ROUGE from the text predictions
            data = [
                json.loads(line.strip()) for line in open(
                    f"{args.out_dir}/{setup}_{lm}/test_{setup}_{lm}.jsonl")
            ]
            gold = defaultdict(list)
            predictions = defaultdict(set)

            for ex in data:
                curr_gold = ex["gold"].lower().replace("<eos>", "").strip()
                curr_gold = curr_gold.split(
                    " [rationale] "
                )[1] if " [rationale] " in curr_gold else curr_gold.replace(
                    " [update]", "").replace(" [update_type]", "").replace(
                        " [rationale]", "")
                curr_preds = [
                    pred.lower().strip() for pred in ex["predictions"]
                ]
                curr_preds = set(
                    [pred for pred in curr_preds if len(pred) > 1])

                if len(curr_gold) > 0 and len(curr_preds) > 0:
                    gold[ex["input"]].append(curr_gold)
                    predictions[ex["input"]] = predictions[ex["input"]].union(
                        curr_preds)

            bleu_scores, rouge_scores = [], []

            for input, curr_gold in gold.items():
                curr_predictions = list(predictions[input])

                # The refs and gold must be in the same size
                length = min(len(curr_gold), len(curr_predictions))

                if length > 0:
                    hyps = curr_predictions[:length]
                    refs = curr_gold[:length]
                    rouge_scores.extend([
                        score["rouge-l"]["f"]
                        for score in rouge.get_scores(hyps, refs)
                    ])

                    hyps = [tuple(h.split()) for h in hyps]
                    refs = [tuple(r.split()) for r in refs]
                    bleu_scores.extend([
                        bleu(refs,
                             pred,
                             weights=weights,
                             smoothing_function=smoothing) for pred in hyps
                    ])

            print("\t".join([
                setup, lm, f"{100.0 * np.mean(bleu_scores):.3f}",
                f"{100.0 * np.mean(rouge_scores):.3f}"
            ]))
Beispiel #10
0

hyps = []
refs = []
stories = []
dim_rels = []
for l in original_data:
    stories.append(l['story'])
    d_ = [entry for entry in data if entry['story'] == l['story']]
    if len(d_) == 0:
       continue 
    d_ = d_[0]
    dim = reverse_template(l['prefix'])
    dim_rels.append(dim)
    gold_rel = add_template(l['rel'],dim)
    gen_rel = d_['<|sent' + str(l['sentID']) + '|>_generated_relations'][dims.index(dim)]
    gen_rel = [add_template(g, dim) for g in gen_rel]
    hyps.extend(gen_rel)
    refs.extend([gold_rel] * len(gen_rel))

print('num unique stories: ' + str(len(set(stories))))
hyps = [tuple(h.split()) for h in hyps]
refs = [tuple(r.split()) for r in refs]
smoothing = SmoothingFunction().method1
weights = [0.5] * 2

bleu_scores1 = [bleu(refs, pred, weights=[1.0], smoothing_function=smoothing) for pred in hyps]
print(f"bleu1={100.0 * np.mean(bleu_scores1):.3f}")
bleu_scores2 = [bleu(refs, pred, weights=weights, smoothing_function=smoothing) for pred in hyps]
print(f"bleu2={100.0 * np.mean(bleu_scores2):.3f}")
Beispiel #11
0
 def _scoring_f(self, hyp, refs):
     return bleu(refs,hyp,weights=self.weights)
            maxprob = tmp_prob
            translated_sent = tmp_sent
    #print('** Max prob :')
    #print(maxprob)
    return {'translation':translated_sent, 'probability':maxprob} 
    
with open('../CleanedEnglish1000.txt', 'r') as fp1:
    with open('../CleanedFrench1000.txt', 'r') as fp2:

        for num in range(0,5):
            frSent = fp2.readline()
            enSent = fp1.readline()

            frSent = re.sub('[\n]', '', frSent)
            enSent = re.sub('[\n]', '', enSent)
            print('=====> Read french Sentence')
            print('=====> Translating to english')
            out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam)
            print('=====> Finished translation')
            print('==================================')
            print 'French Sentence: ', frSent
            print('----------------------------------')
            print 'English Sentence: ', enSent
            print('----------------------------------')
            print 'Translated Sentence: ', out['translation']
            print('----------------------------------')

            ## Computing bleu score
            print 'BLEU score of translation', nltk.bleu(out['translation'],enSent,[1])
            print('==================================')
            translated_sent = tmp_sent
    #print('** Max prob :')
    #print(maxprob)
    ''' and 0
    return {'translation':translatedSentence, 'probability':maxprob} 
    
with open('../CleanedEnglish100.txt', 'r') as fp1:
    with open('../CleanedFrench100.txt', 'r') as fp2:

        for num in range(0,5):
            frSent = fp2.readline()
            enSent = fp1.readline()

            frSent = re.sub('[\n]', '', frSent)
            enSent = re.sub('[\n]', '', enSent)
            print('=====> Read french Sentence')
            print('=====> Translating to english')
            out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam)
            print('=====> Finished translation')
            print('==================================')
            print 'French Sentence: ', frSent
            print('----------------------------------')
            print 'English Sentence: ', enSent
            print('----------------------------------')
            print 'Translated Sentence: ', out['translation']
            print('----------------------------------')

            ## Computing bleu score
            print 'BLEU score of translation', nltk.bleu(out['translation'],[enSent, enSent, enSent],[0.25, 0.25, 0.25, 0.25])
            print('==================================')