def bleu_evaluation(self, candidate): abstractive_score = None extractive_score = None """first elemnt in refrences is the abstractive resume and the second one is the extractive resume""" # if the list is not empty if self.resume_abstractive: abstractive_score = sentence_bleu([ tools.tokenize( tools.text_to_string( './manual_resume_abstractive/' + text + '.txt', is_resume_abstract=True)) for text in self.resume_abstractive ], candidate) # if the list is not empty if self.resume_extractive: #self.clean_extractive(self.corpus) extractive_score = sentence_bleu([ tools.tokenize( tools.text_to_string('./axilaury_extractive/' + text + '.txt')) for text in self.resume_extractive ], candidate) return (abstractive_score, extractive_score)
['meet_116', 'meet_117', 'meet_118', 'meet_119'], ['meet_120', 'meet_121', 'meet_122', 'meet_123'], ['meet_124', 'meet_125', 'meet_126', 'meet_127'], ['meet_128', 'meet_129', 'meet_130', 'meet_131'], ['meet_132', 'meet_133', 'meet_134', 'meet_135'], ['meet_136', 'meet_137', 'meet_138', 'meet_139'], ['meet_140', 'meet_141', 'meet_142', 'meet_143'], ['meet_144', 'meet_145', 'meet_146', 'meet_147'], ['meet_148', 'meet_149', 'meet_150', 'meet_151'] ] meet_list = ["block_" + str(i + 1) for i in xrange(34)] print 'meet_list', meet_list adress_ = "./manual_blocks/" all_documents = [tools.text_to_string(adress_ + i + '.txt') for i in meet_list] example = tf_idf_class(all_documents) tfidf_representation = example.tfidf() counter = 0 for meet in meet_list: print '***********' print 'meet', meet meet_block = classification_scenario_based[counter] file.write(str(meet_block) + '\n') file.write('*****************' + '\n')
# file.flush() # # file.close() file = open('./result/Row_results/result_relative_1.txt', 'w') adress_ = "./manuel_corpus/" file.write( 'this file includes the tf-idf scores for 138 scenario-based meetings' + '\n' + 'For each meeting, the tf-idf scores are computed by comparing this meeting w.r.t other scenario-based ' + '\n' + 'meetings esxcept the meetins with the same order in their bloks.' + '\n') all_documents_group_1 = [ tools.text_to_string(adress_ + i + '.txt') for i in m1.itervalues() ] all_documents_group_2 = [ tools.text_to_string(adress_ + i + '.txt') for i in m2.itervalues() ] all_documents_group_3 = [ tools.text_to_string(adress_ + i + '.txt') for i in m3.itervalues() ] all_documents_group_4 = [ tools.text_to_string(adress_ + i + '.txt') for i in m4.itervalues() if i is not None ] all_documents__1 = all_documents_group_2 + all_documents_group_3 + all_documents_group_4 all_documents__2 = all_documents_group_1 + all_documents_group_3 + all_documents_group_4 all_documents__3 = all_documents_group_2 + all_documents_group_1 + all_documents_group_4
def blocks_4_4_ami(n_words, length= None): """ this function extracts the first :return: """ file = open(path + 'result_ami/result_4_4.txt', 'w') adress = path + '/manual_corpus/' if length is None: for meet_list in classification_scenario_based: file.write('************************' + str(meet_list) + '*************************************\n') all_documents = [tools.text_to_string(adress + i + '.txt') for i in meet_list] example = tf_idf_class(all_documents) # , document_0) tfidf_representation = example.tfidf() for meet in meet_list: file.write(meet+ '\n') file.write('*****************\n') c = compare_with_AMI_results(meet) c.get_resumes() candidate = c.get_best_k_tfidf(n_words, tfidf_representation[meet_list.index(meet)]) file.write('candidate = ' + str(candidate)+ '\n') file.flush() bleu_measure = c.bleu_evaluation(candidate) file.write('score_abstractive = ' + str(bleu_measure[0])+ '\n') file.write('score_extractive = ' + str(bleu_measure[1])+ '\n') file.flush() file.write('\n') file.flush() else: for meet_list in classification_scenario_based[0:length]: file.write('************************' + str(meet_list) + '*************************************\n') all_documents = [tools.text_to_string(adress + i + '.txt') for i in meet_list] example = tf_idf_class(all_documents) # , document_0) tfidf_representation = example.tfidf(length) for meet in meet_list: file.write(meet+ '\n') file.write('*****************\n') c = compare_with_AMI_results(meet) c.get_resumes() candidate = c.get_best_k_tfidf(n_words, tfidf_representation[meet_list.index(meet)]) file.write('candidate = ' + str(candidate)+ '\n') file.flush() bleu_measure = c.bleu_evaluation(candidate) file.write('score_abstractive = ' + str(bleu_measure[0])+ '\n') file.write('score_extractive = ' + str(bleu_measure[1])+ '\n') file.flush() file.write('\n') file.flush() file.close() return
def blocks_ami(n_words, length = None): #print('we are inside') file = open(path + 'result_ami/result_4_block_scen.txt', 'w') adress_ = path + "manual_blocks/" meet_list = ["block_" + str(i + 1) for i in xrange(34)] all_documents = [tools.text_to_string(adress_ + i + '.txt') for i in meet_list] example = tf_idf_class(all_documents) counter = 0 if length is None: tfidf_representation = example.tfidf() for meet in meet_list: #print '***********' #print 'meet', meet meet_block = classification_scenario_based[counter] file.write(str(meet_block) + '\n') file.write('*****************' + '\n') file.flush() c = compare_with_AMI_results(meet_block) c.get_resumes() candidate = c.get_best_k_tfidf(n_words, tfidf_representation[counter]) file.write('candidate = ' + str(candidate) + '\n') file.flush() bleu_measure = c.bleu_evaluation(candidate) file.write('score_abstractive = ' + str(bleu_measure[0]) + '\n') file.write('score_extractive = ' + str(bleu_measure[1]) + '\n') file.write('\n') file.flush() counter += 1 else: #print('length is not none') tfidf_representation = example.tfidf(length) for meet in meet_list[0:length]: #print '***********' #print 'meet', meet meet_block = classification_scenario_based[counter] file.write(str(meet_block) + '\n') file.write('*****************' + '\n') file.flush() c = compare_with_AMI_results(meet_block) c.get_resumes() candidate = c.get_best_k_tfidf(n_words, tfidf_representation[counter]) file.write('candidate = ' + str(candidate) + '\n') file.flush() bleu_measure = c.bleu_evaluation(candidate) file.write('score_abstractive = ' + str(bleu_measure[0]) + '\n') file.write('score_extractive = ' + str(bleu_measure[1]) + '\n') file.write('\n') file.flush() counter += 1 file.close()
['meet_116', 'meet_117', 'meet_118', 'meet_119'], ['meet_120', 'meet_121', 'meet_122', 'meet_123'], ['meet_124', 'meet_125', 'meet_126', 'meet_127'], ['meet_128', 'meet_129', 'meet_130', 'meet_131'], ['meet_132', 'meet_133', 'meet_134', 'meet_135'], ['meet_136', 'meet_137', 'meet_138', 'meet_139'], ['meet_140', 'meet_141', 'meet_142', 'meet_143'], ['meet_144', 'meet_145', 'meet_146', 'meet_147'], ['meet_148', 'meet_149', 'meet_150', 'meet_151'] ] for meet_list in classification_scenario_based: file.write('************************' + str(meet_list) + '*************************************\n') all_documents = [ tools.text_to_string(adress + i + '.txt') for i in meet_list ] example = tf_idf_class(all_documents) # , document_0) tfidf_representation = example.tfidf() for meet in meet_list: file.write(meet + '\n') file.write('*****************\n') c = compare_with_AMI_results(meet) c.get_resumes() candidate = c.get_best_k_tfidf( 20, tfidf_representation[meet_list.index(meet)]) file.write('candidate = ' + str(candidate) + '\n')