def organize_words(self): """ After run all the generation, we organize the sentence finding the first word in the original sentence and through blue evaluation finding the words after it. :return: """ # -------- Finds the first word -------- # word_fit = [] for word in self.found_sentence: fitness = bleu(hypothesis=word, references=self.target_sentence.split(' ')[:1], auto_reweigh=True) word_fit.append((word, fitness)) word_fit.sort(key=lambda tup: tup[1], reverse=True) first_word = word_fit[0][0] # ------------------------------------------- self.found_sentence.remove(first_word) # ----- search for the next words -------------# final_sentence = [first_word] bi_gram = [first_word] fitness = [] i = 1 while len(self.target_sentence.split(' ')) > i: i += 1 for word in self.found_sentence: if sys.intern(word) is not sys.intern(bi_gram[0]): bi_gram.append(word) bi_fitness = bleu( hypothesis=bi_gram, references=[self.target_sentence.split(' ')], auto_reweigh=True) bi_gram = bi_gram[:-1] fitness.append((word, bi_fitness)) fitness.sort(key=lambda tup: tup[-1], reverse=True) final_sentence.append(fitness[0][0]) bi_gram = [fitness[0][0]] fitness = [] last_fitness = bleu(hypothesis=final_sentence, references=[self.target_sentence.split(' ')], auto_reweigh=True) print("Last generation: {}\nLast Fitness {}".format( final_sentence, last_fitness))
def test_simple(self): sentence = """我最爱吃的东西是凤梨""" tokens = jieba.lcut(sentence) self.logger.debug(','.join(tokens)) jieba.add_word('爱吃') references_1 = jieba.lcut('我爱吃的东西是凤梨啊') self.logger.debug('references 1: %s', references_1) references_2 = jieba.lcut('他不爱吃苹果') self.logger.debug('references 2: %s', references_2) references_3 = jieba.lcut('我们都是中国人地地道道') self.logger.debug('references 3: %s', references_3) score = nltk.bleu([references_1], tokens) self.logger.debug('bleu score is %s', score) pass
def py_compute_sentence_bleu(self, prediction, reference, debug=False): scores = [] for pred_i, ref_i in zip(prediction, reference): pred_i, ref_i = map(self.crop_eos, [pred_i, ref_i]) if len(pred_i) > 0 and len(ref_i) > 0: score_i = nltk.bleu([ref_i], pred_i, smoothing_function=self.smoothing_function) else: score_i = 0 scores.append(score_i) if debug: print('pred and ref:', pred_i, ref_i) print('score:', score_i) return np.array(scores, dtype=np.float32)
def avg_bleu_score(sen, summaries, avg=False): min_length = 5 if avg: from nltk.translate.bleu_score import SmoothingFunction chencherry = SmoothingFunction() total = 0 for summ in summaries: total += bleu([summ], sen, smoothing_function=chencherry.method2) score = total / len(summaries) else: # score = bleu(summaries, sen, smoothing_function=chencherry.method2) score = nltk.translate.bleu_score.modified_precision(summaries, sen, 2) if len(sen) < min_length: import numpy as np score *= np.exp(1 - (min_length / len(sen))) return score
def run_evaluations(self): """ Run the evaluations of each word in the vocabulary to add it or not to the next generation of 'parents' word :return: """ bi_gram = [] i = 0 self.initialize_population() print("target sentence: ", self.target_sentence.split(' ')) print("Vocabulary: ", self.tar_vocabulary) parents = list(itertools.chain.from_iterable(self.generation)) print("Parents: ", parents) while len(self.target_sentence.split(' ')) > i: evaluation_array = [] for word in self.tar_vocabulary: if sys.intern(word) is not sys.intern(parents[-1]): parents.append(word) bi_gram.append(word) # Markov Hidden states fitness = bleu( hypothesis=parents, references=[self.target_sentence.split(' ')], auto_reweigh=True) evaluation_array.append((word, fitness)) parents = parents[:-1] bi_gram = [] evaluation_array.sort(key=lambda tup: tup[-1], reverse=True) print(evaluation_array) i += 1 new_word = evaluation_array[0][0] if new_word not in parents: parents.append(new_word) else: self.tar_vocabulary.remove(new_word) self.found_sentence = parents self.organize_words()
def bleu(model, prior, prefix_length, tokenize, **kwargs): prefix, reference = prior[:prefix_length], prior[prefix_length:] model.reset_generator() hypothesis = model.generate(prefix=prefix, length=len(reference), verbose=0, **kwargs) reference = tokenize(reference) hypothesis = tokenize(hypothesis) score = nltk.bleu([reference], hypothesis) # score = nltk.bleu_score.modified_precision([reference], hypothesis, n=4) return score
def score(hyp, refs): return bleu(refs, hyp, weights=weights, smoothing_function=SmoothingFunction().method1)
# print(maxprob) return {"translation": translated_sent, "probability": maxprob} with open("../CleanedEnglish1000.txt", "r") as fp1: with open("../CleanedFrench1000.txt", "r") as fp2: for num in range(0, 5): frSent = fp2.readline() enSent = fp1.readline() frSent = re.sub("[\n]", "", frSent) enSent = re.sub("[\n]", "", enSent) print ("=====> Read french Sentence") print ("=====> Translating to english") out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam) print ("=====> Finished translation") print ("==================================") print "French Sentence: ", frSent print ("----------------------------------") print "English Sentence: ", enSent print ("----------------------------------") print "Translated Sentence: ", out["translation"] print ("----------------------------------") ## Computing bleu score print "BLEU score of translation", nltk.bleu( out["translation"], [enSent, enSent, enSent], [0.25, 0.25, 0.25, 0.25] ) print ("==================================")
def main(): parser = argparse.ArgumentParser() parser.add_argument("--out_dir", type=str, required=True, help="The directory of the outputs") args = parser.parse_args() print("\t".join(["Setup", "LM", "BLEU", "ROUGE"])) for setup in [ "rationale", "multi", "update_rationale", "update_type_rationale" ]: for lm in ["bart-large", "gpt2-xl"]: # Compute BLEU and ROUGE from the text predictions data = [ json.loads(line.strip()) for line in open( f"{args.out_dir}/{setup}_{lm}/test_{setup}_{lm}.jsonl") ] gold = defaultdict(list) predictions = defaultdict(set) for ex in data: curr_gold = ex["gold"].lower().replace("<eos>", "").strip() curr_gold = curr_gold.split( " [rationale] " )[1] if " [rationale] " in curr_gold else curr_gold.replace( " [update]", "").replace(" [update_type]", "").replace( " [rationale]", "") curr_preds = [ pred.lower().strip() for pred in ex["predictions"] ] curr_preds = set( [pred for pred in curr_preds if len(pred) > 1]) if len(curr_gold) > 0 and len(curr_preds) > 0: gold[ex["input"]].append(curr_gold) predictions[ex["input"]] = predictions[ex["input"]].union( curr_preds) bleu_scores, rouge_scores = [], [] for input, curr_gold in gold.items(): curr_predictions = list(predictions[input]) # The refs and gold must be in the same size length = min(len(curr_gold), len(curr_predictions)) if length > 0: hyps = curr_predictions[:length] refs = curr_gold[:length] rouge_scores.extend([ score["rouge-l"]["f"] for score in rouge.get_scores(hyps, refs) ]) hyps = [tuple(h.split()) for h in hyps] refs = [tuple(r.split()) for r in refs] bleu_scores.extend([ bleu(refs, pred, weights=weights, smoothing_function=smoothing) for pred in hyps ]) print("\t".join([ setup, lm, f"{100.0 * np.mean(bleu_scores):.3f}", f"{100.0 * np.mean(rouge_scores):.3f}" ]))
hyps = [] refs = [] stories = [] dim_rels = [] for l in original_data: stories.append(l['story']) d_ = [entry for entry in data if entry['story'] == l['story']] if len(d_) == 0: continue d_ = d_[0] dim = reverse_template(l['prefix']) dim_rels.append(dim) gold_rel = add_template(l['rel'],dim) gen_rel = d_['<|sent' + str(l['sentID']) + '|>_generated_relations'][dims.index(dim)] gen_rel = [add_template(g, dim) for g in gen_rel] hyps.extend(gen_rel) refs.extend([gold_rel] * len(gen_rel)) print('num unique stories: ' + str(len(set(stories)))) hyps = [tuple(h.split()) for h in hyps] refs = [tuple(r.split()) for r in refs] smoothing = SmoothingFunction().method1 weights = [0.5] * 2 bleu_scores1 = [bleu(refs, pred, weights=[1.0], smoothing_function=smoothing) for pred in hyps] print(f"bleu1={100.0 * np.mean(bleu_scores1):.3f}") bleu_scores2 = [bleu(refs, pred, weights=weights, smoothing_function=smoothing) for pred in hyps] print(f"bleu2={100.0 * np.mean(bleu_scores2):.3f}")
def _scoring_f(self, hyp, refs): return bleu(refs,hyp,weights=self.weights)
maxprob = tmp_prob translated_sent = tmp_sent #print('** Max prob :') #print(maxprob) return {'translation':translated_sent, 'probability':maxprob} with open('../CleanedEnglish1000.txt', 'r') as fp1: with open('../CleanedFrench1000.txt', 'r') as fp2: for num in range(0,5): frSent = fp2.readline() enSent = fp1.readline() frSent = re.sub('[\n]', '', frSent) enSent = re.sub('[\n]', '', enSent) print('=====> Read french Sentence') print('=====> Translating to english') out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam) print('=====> Finished translation') print('==================================') print 'French Sentence: ', frSent print('----------------------------------') print 'English Sentence: ', enSent print('----------------------------------') print 'Translated Sentence: ', out['translation'] print('----------------------------------') ## Computing bleu score print 'BLEU score of translation', nltk.bleu(out['translation'],enSent,[1]) print('==================================')
translated_sent = tmp_sent #print('** Max prob :') #print(maxprob) ''' and 0 return {'translation':translatedSentence, 'probability':maxprob} with open('../CleanedEnglish100.txt', 'r') as fp1: with open('../CleanedFrench100.txt', 'r') as fp2: for num in range(0,5): frSent = fp2.readline() enSent = fp1.readline() frSent = re.sub('[\n]', '', frSent) enSent = re.sub('[\n]', '', enSent) print('=====> Read french Sentence') print('=====> Translating to english') out = translate(frSent, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam) print('=====> Finished translation') print('==================================') print 'French Sentence: ', frSent print('----------------------------------') print 'English Sentence: ', enSent print('----------------------------------') print 'Translated Sentence: ', out['translation'] print('----------------------------------') ## Computing bleu score print 'BLEU score of translation', nltk.bleu(out['translation'],[enSent, enSent, enSent],[0.25, 0.25, 0.25, 0.25]) print('==================================')