Ejemplo n.º 1
0
 def save_value(self, path, text, coverage_list, relative_matrix,
                clues_list, entities_list):
     ftools.check_filename(path)
     save_dict = {}
     save_dict['#$#'.join(text)] = [
         coverage_list, relative_matrix, clues_list, entities_list
     ]
     tools.save_object(save_dict, path)
Ejemplo n.º 2
0
def train(traindata, savepath=Dir.res + "/parameter/words_vector/w2v.model"):
    ftools.check_filename(savepath)
    model = Word2Vec(sentences=traindata,
                     size=200,
                     window=5,
                     min_count=3,
                     workers=4)
    model.save(savepath)
Ejemplo n.º 3
0
 def save_value(self, path, key, coverage_list, relative_matrix, clues_list,
                entities_list):
     ftools.check_filename(path)
     save_dict = {}
     save_dict[key] = [
         coverage_list, relative_matrix, clues_list, entities_list
     ]
     tools.save_object(save_dict, path)
Ejemplo n.º 4
0
def PythonROUGE(guess_summ_list, ref_summ_list, ngram_order=2):
    """ Wrapper function to use ROUGE from Python easily. """

    # even though we ask that the first argument is a list,
    # if it is a single string we can handle it
    if type(guess_summ_list) == str:
        temp = list()
        temp.append(ref_summ_list)
        guess_summ_list = temp
        del temp

    # even though we ask that the second argument is a list of lists,
    # if it is a single string we can handle it
#    if type(ref_summ_list[0]) == str:
#        temp = list()
#        temp.append(ref_summ_list)
#        ref_summ_list = temp
#        del temp

# this is the path to your ROUGE distribution

    ROUGE_path = Dir.res + "/RELEASE-1.5.5/ROUGE-1.5.5.pl"
    data_path = Dir.res + "/RELEASE-1.5.5/data"

    # these are the options used to call ROUGE
    # feel free to edit this is you want to call ROUGE with different options
    options = '-a -t 1 -m -l 140 -n ' + str(ngram_order)

    # this is a temporary XML file which will contain information
    # in the format ROUGE uses
    xml_path = Dir.res + '/Temp/temp.xml'
    tools.check_filename(xml_path)
    xml_file = open(xml_path, 'w')
    xml_file.write('<ROUGE-EVAL version="1.0">\n')
    for guess_summ_index, guess_summ_file in enumerate(guess_summ_list):
        xml_file.write('<EVAL ID="' + str(guess_summ_index + 1) + '">\n')
        create_xml(xml_file, guess_summ_file, ref_summ_list[guess_summ_index])
        xml_file.write('</EVAL>\n')
    xml_file.write('</ROUGE-EVAL>\n')
    xml_file.close()

    # this is the file where the output of ROUGE will be stored
    ROUGE_output_path = Dir.res + '/Temp/ROUGE_result.txt'

    # this is where we run ROUGE itself
    exec_command = ROUGE_path + ' -e ' + data_path + ' ' + options + ' -x ' + xml_path + ' > ' + ROUGE_output_path
    # print(exec_command)
    os.system(exec_command)

    # here, we read the file with the ROUGE output and
    # look for the recall, precision, and F-measure scores
    recall_list = list()
    precision_list = list()
    F_measure_list = list()
    with open(ROUGE_output_path, 'r') as ROUGE_output_file:
        for n in range(ngram_order):
            ROUGE_output_file.seek(0)
            for line in ROUGE_output_file:
                match = re.findall(
                    'X ROUGE-' + str(n + 1) + ' Average_R: ([0-9.]+)', line)
                if match != []:
                    recall_list.append(float(match[0]))
                match = re.findall(
                    'X ROUGE-' + str(n + 1) + ' Average_P: ([0-9.]+)', line)
                if match != []:
                    precision_list.append(float(match[0]))
                match = re.findall(
                    'X ROUGE-' + str(n + 1) + ' Average_F: ([0-9.]+)', line)
                if match != []:
                    F_measure_list.append(float(match[0]))
    # with open(ROUGE_output_path, 'r') as ROUGE_output_file:
    #     content = ROUGE_output_file.read()
    #     match = re.findall('X ROUGE-L Average_R: ([0-9.]+)', content)
    #     if match != []:
    #         recall_list.append(float(match[0]))
    #     match = re.findall('X ROUGE-L Average_P: ([0-9.]+)', content)
    #     if match != []:
    #         precision_list.append(float(match[0]))
    #     match = re.findall('X ROUGE-L Average_F: ([0-9.]+)', content)
    #     if match != []:
    #         F_measure_list.append(float(match[0]))

    ROUGE_output_file.close()

    # remove temporary files which were created
    # os.remove(xml_path)
    # os.remove(ROUGE_output_path)

    return (recall_list, precision_list, F_measure_list)