def get_bleu_similarity(reference_answers, student_answer): porter_stemmer = PorterStemmer() reference_answers_tokens = [] for answer in reference_answers: reference_answers_tokens.append(map(lambda x: str(porter_stemmer.stem(x)), answer.split())) student_answer = map(lambda x: str(porter_stemmer.stem(x)), student_answer.split()) weights = [0.25, 0.25] return bleu(student_answer,reference_answers_tokens, weights)
def get_bleu_similarity(reference_answers, student_answer): porter_stemmer = PorterStemmer() reference_answers_tokens = [] for answer in reference_answers: reference_answers_tokens.append( map(lambda x: str(porter_stemmer.stem(x)), answer.split())) student_answer = map(lambda x: str(porter_stemmer.stem(x)), student_answer.split()) weights = [0.25, 0.25] return bleu(student_answer, reference_answers_tokens, weights)
def runAndSave(self, args): """ This method was build upon the normal 'run' method but was used for generating the final results. The team names are hardcoded to match columns in the CSV files, so this is rather an example how to perform a detailed analysis.""" self.pen.score_for_matched_lexical = args[0] self.pen.score_for_matched_synonym = args[1] self.factor_word_offset_penalty = args[2] self.factor_sentence_length_mismatch = args[3] self.factor_name_mismatch = args[4] self.factor_fe_offset_penalty = args[5] self.weight_target_frame_element = args[6] self.weight_frame_elements = args[7] self.factor_frame_offset_penalty = args[8] team_to_row = { "newstest2014.CMU.3461.de-en" : 0, "newstest2014.DCU-ICTCAS-Tsinghua-L.3444.de-en" : 1, "newstest2014.LIMSI-KIT-Submission.3359.de-en" : 2, "newstest2014.RWTH-primary.3266.de-en" : 3, "newstest2014.eubridge.3569.de-en" : 4, "newstest2014.kit.3109.de-en" : 5, "newstest2014.onlineA.0.de-en" : 6, "newstest2014.onlineB.0.de-en" : 7, "newstest2014.onlineC.0.de-en" : 8, "newstest2014.rbmt1.0.de-en" : 9, "newstest2014.rbmt4.0.de-en" : 10, "newstest2014.uedin-syntax.3035.de-en" : 11, "newstest2014.uedin-wmt14.3025.de-en" : 12, "newstest2014-deen-ref.de-en" : 13} teams = list(team_to_row.keys()) teams.remove("newstest2014-deen-ref.de-en") def_list = ['-' for x in range(len(team_to_row))] with open('ourPessimisticRankingDiff.csv', 'wb') as our_csvfile: with open('bleuPessimisticRankingDiff.csv', 'wb') as bleu_csvfile: ourwriter = csv.writer(our_csvfile) bleuwriter = csv.writer(bleu_csvfile) our_print_res = list(def_list) bleu_print_res = list(def_list) for team in team_to_row.iterkeys(): if team in teams: our_print_res[team_to_row[team]] = team bleu_print_res[team_to_row[team]] = team ourwriter.writerow(our_print_res) bleuwriter.writerow(bleu_print_res) for row in range(self.data.get_number_of_rows()): print row ref_sentence = self.data.get_row(row)[self.data.get_gold()] our_print_res = list(def_list) bleu_print_res = list(def_list) our_results = {} bleu_results = {} for team, team_sentence in self.data.get_row_for_teams(self.evaluator.get_teams(row), row).iteritems(): our = self.get_sentence_score(ref_sentence, team_sentence) our_results[team] = our bleus = bleu_score.bleu(self.data.get_sentence_for_object(team_sentence).split(), self.data.get_sentence_for_object(ref_sentence).split(), [1]) bleu_results[team] = bleus for team, rank in self.evaluator.diffAll(row, our_results).iteritems(): our_print_res[team_to_row[team]] = rank for team, rank in self.evaluator.diffAll(row, bleu_results).iteritems(): bleu_print_res[team_to_row[team]] = rank ourwriter.writerow(our_print_res) bleuwriter.writerow(bleu_print_res)