def save_value(self, path, text, coverage_list, relative_matrix, clues_list, entities_list): ftools.check_filename(path) save_dict = {} save_dict['#$#'.join(text)] = [ coverage_list, relative_matrix, clues_list, entities_list ] tools.save_object(save_dict, path)
def train(traindata, savepath=Dir.res + "/parameter/words_vector/w2v.model"): ftools.check_filename(savepath) model = Word2Vec(sentences=traindata, size=200, window=5, min_count=3, workers=4) model.save(savepath)
def save_value(self, path, key, coverage_list, relative_matrix, clues_list, entities_list): ftools.check_filename(path) save_dict = {} save_dict[key] = [ coverage_list, relative_matrix, clues_list, entities_list ] tools.save_object(save_dict, path)
def PythonROUGE(guess_summ_list, ref_summ_list, ngram_order=2): """ Wrapper function to use ROUGE from Python easily. """ # even though we ask that the first argument is a list, # if it is a single string we can handle it if type(guess_summ_list) == str: temp = list() temp.append(ref_summ_list) guess_summ_list = temp del temp # even though we ask that the second argument is a list of lists, # if it is a single string we can handle it # if type(ref_summ_list[0]) == str: # temp = list() # temp.append(ref_summ_list) # ref_summ_list = temp # del temp # this is the path to your ROUGE distribution ROUGE_path = Dir.res + "/RELEASE-1.5.5/ROUGE-1.5.5.pl" data_path = Dir.res + "/RELEASE-1.5.5/data" # these are the options used to call ROUGE # feel free to edit this is you want to call ROUGE with different options options = '-a -t 1 -m -l 140 -n ' + str(ngram_order) # this is a temporary XML file which will contain information # in the format ROUGE uses xml_path = Dir.res + '/Temp/temp.xml' tools.check_filename(xml_path) xml_file = open(xml_path, 'w') xml_file.write('<ROUGE-EVAL version="1.0">\n') for guess_summ_index, guess_summ_file in enumerate(guess_summ_list): xml_file.write('<EVAL ID="' + str(guess_summ_index + 1) + '">\n') create_xml(xml_file, guess_summ_file, ref_summ_list[guess_summ_index]) xml_file.write('</EVAL>\n') xml_file.write('</ROUGE-EVAL>\n') xml_file.close() # this is the file where the output of ROUGE will be stored ROUGE_output_path = Dir.res + '/Temp/ROUGE_result.txt' # this is where we run ROUGE itself exec_command = ROUGE_path + ' -e ' + data_path + ' ' + options + ' -x ' + xml_path + ' > ' + ROUGE_output_path # print(exec_command) os.system(exec_command) # here, we read the file with the ROUGE output and # look for the recall, precision, and F-measure scores recall_list = list() precision_list = list() F_measure_list = list() with open(ROUGE_output_path, 'r') as ROUGE_output_file: for n in range(ngram_order): ROUGE_output_file.seek(0) for line in ROUGE_output_file: match = re.findall( 'X ROUGE-' + str(n + 1) + ' Average_R: ([0-9.]+)', line) if match != []: recall_list.append(float(match[0])) match = re.findall( 'X ROUGE-' + str(n + 1) + ' Average_P: ([0-9.]+)', line) if match != []: precision_list.append(float(match[0])) match = re.findall( 'X ROUGE-' + str(n + 1) + ' Average_F: ([0-9.]+)', line) if match != []: F_measure_list.append(float(match[0])) # with open(ROUGE_output_path, 'r') as ROUGE_output_file: # content = ROUGE_output_file.read() # match = re.findall('X ROUGE-L Average_R: ([0-9.]+)', content) # if match != []: # recall_list.append(float(match[0])) # match = re.findall('X ROUGE-L Average_P: ([0-9.]+)', content) # if match != []: # precision_list.append(float(match[0])) # match = re.findall('X ROUGE-L Average_F: ([0-9.]+)', content) # if match != []: # F_measure_list.append(float(match[0])) ROUGE_output_file.close() # remove temporary files which were created # os.remove(xml_path) # os.remove(ROUGE_output_path) return (recall_list, precision_list, F_measure_list)