def print_for_rouge(self, decoded_sents, ref_sents, corpus="giga"): assert len(decoded_sents) == len(ref_sents) ref_dir = os.path.join(eval_config.save_model_path, 'reference') cand_dir = os.path.join(eval_config.save_model_path, 'candidate') if not os.path.exists(ref_dir): os.mkdir(ref_dir) if not os.path.exists(cand_dir): os.mkdir(cand_dir) if corpus == "giga": for i in range(len(ref_sents)): with codecs.open(ref_dir + "/%06d_reference.txt" % i, 'w+', 'utf-8') as f: f.write(ref_sents[i]) with codecs.open(cand_dir + "/%06d_candidate.txt" % i, 'w+', 'utf-8') as f: f.write(decoded_sents[i]) r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' else: for i in range(len(ref_sents)): nickname = ['A', 'B', 'C', 'D'] for task in range(len(ref_sents[0])): ref_file_name = nickname[task] + ".%06d_reference.txt" % i with codecs.open(os.path.join(ref_dir, ref_file_name), 'w+', 'utf-8') as f: f.write(ref_sents[i][task].strip().lower()) with codecs.open(cand_dir + "/%06d_candidate.txt" % i, 'w+', 'utf-8') as f: f.write(decoded_sents[i]) r = pyrouge.Rouge155() r.model_filename_pattern = '[A-Z].#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) recall = [ round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2) ] precision = [ round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2) ] f_score = [ round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2) ] print("F_measure: %s Recall: %s Precision: %s\n" % (str(f_score), str(recall), str(precision)))
def PERLROUGE(reference, generated, stem): rouge_dir = join( dirname(abspath(__file__)), "ROUGE-1.5.5") rouge = pyrouge.Rouge155(rouge_dir) rouge._Rouge155__add_config_option = (lambda options: [*options, *(["-m"] if stem else []), rouge._config_file]) rouge.system_dir = tempfile.mkdtemp("-systems") rouge.model_dir = tempfile.mkdtemp("-models") with open(join(rouge.system_dir, "1.txt"), "w", encoding = "utf-8") as f: f.write(depunct.sub(" ", generated)) with open(join(rouge.model_dir, "1.txt"), "w", encoding = "utf-8") as f: f.write(depunct.sub(" ", reference)) rouge.system_filename_pattern = "(\d+).txt" rouge.model_filename_pattern = "#ID#.txt" try: result = rouge.output_to_dict(rouge.convert_and_evaluate()) except: result = {} return tuple(result.items())
def rouge_eval(ref_dir, dec_dir, l_param=100): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" r = pyrouge.Rouge155() # r.model_filename_pattern = '#ID#_reference.txt' r.model_filename_pattern = '#ID#_reference.[A-Z].txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir log.getLogger('global').setLevel(log.WARNING) # silence pyrouge logging rouge_args = [ '-e', r._data_dir, '-c', '95', '-2', '4', # This is the only one we changed (changed the max skip from -1 to 4) '-U', '-r', '1000', '-n', '4', '-w', '1.2', '-a', '-l', str(l_param) ] rouge_args = ' '.join(rouge_args) rouge_results = r.convert_and_evaluate(rouge_args=rouge_args) return r.output_to_dict(rouge_results)
def run(summ_path, ref_path, rouge_args=None, verbose=False, saveto=None, eos=".", ignore_empty=False, stemming=False, to_json=False): s = settings.Settings() s._load() stime = time() dirpath = tempfile.mkdtemp() sys_root, model_root = [os.path.join(dirpath, _) for _ in ["system", "model"]] # print("Preparing documents...", end=" ") utils.mkdirs([sys_root, model_root]) ignored = utils.split_files(model_file=ref_path, system_file=summ_path, model_dir=model_root, system_dir=sys_root, eos=eos, ignore_empty=ignore_empty) # print("%d line(s) ignored" % len(ignored)) # print("Running ROUGE...") log_level = logging.ERROR if not verbose else None r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']), log_level=log_level) r.system_dir = sys_root r.model_dir = model_root r.system_filename_pattern = r's.(\d+).txt' r.model_filename_pattern = 'm.[A-Z].#ID#.txt' # logger = logging.getLogger(r.__name__) # logger.setLevel(logging.ERROR) data_arg = "-e %s" % s.data['ROUGE_data'] if not rouge_args: rouge_args = [ '-c', 95, '-r', 1000, '-n', 2, '-a'] if stemming: rouge_args.append("-m") rouge_args_str = " ".join([str(_) for _ in rouge_args]) else: rouge_args_str = rouge_args rouge_args_str = "%s %s" % (data_arg, rouge_args_str) output = r.convert_and_evaluate(rouge_args=rouge_args_str) if saveto is not None: saveto = open(saveto, 'w') utils.tee(saveto, output) # print("Elapsed time: %.3f seconds" % (time() - stime)) if to_json: return output_to_json(output) return output
def main(_): rouge = pyrouge.Rouge155() rouge.log.setLevel(logging.ERROR) rouge.system_filename_pattern = "rouge.(\\d+).txt" rouge.model_filename_pattern = "rouge.[A-Z].#ID#.txt" tf.logging.set_verbosity(tf.logging.INFO) tmpdir = mkdtemp() tf.logging.info("tmpdir: %s" % tmpdir) # system = decodes/predictions system_dir = os.path.join(tmpdir, "system") # model = targets/gold model_dir = os.path.join(tmpdir, "model") os.mkdir(system_dir) os.mkdir(model_dir) rouge.system_dir = system_dir rouge.model_dir = model_dir prep_data(rouge.system_dir, rouge.model_dir) rouge_scores = rouge.convert_and_evaluate() rouge_scores = rouge.output_to_dict(rouge_scores) for prefix in ["rouge_1", "rouge_2", "rouge_l"]: for suffix in ["f_score", "precision", "recall"]: key = "_".join([prefix, suffix]) tf.logging.info("%s: %.4f" % (key, rouge_scores[key])) # clean up after pyrouge shutil.rmtree(tmpdir) shutil.rmtree(rouge._config_dir) # pylint: disable=protected-access shutil.rmtree(os.path.split(rouge._system_dir)[0]) # pylint: disable=protected-access
def process(data): candidates, references, pool_id = data cnt = len(candidates) current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) tmp_dir = "rouge-tmp-{}-{}".format(current_time,pool_id) if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) os.mkdir(tmp_dir + "/candidate") os.mkdir(tmp_dir + "/reference") try: for i in range(cnt): if len(references[i]) < 1: continue with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(candidates[i]) with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(references[i]) r = pyrouge.Rouge155() r.model_dir = tmp_dir + "/reference/" r.system_dir = tmp_dir + "/candidate/" r.model_filename_pattern = 'ref.#ID#.txt' r.system_filename_pattern = r'cand.(\d+).txt' rouge_results = r.convert_and_evaluate() # print(rouge_results) results_dict = r.output_to_dict(rouge_results) finally: pass if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir) return results_dict
def rouge_eval(ref_dir, dec_dir): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference_(\d+).txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir return r.convert_and_evaluate()
def setup_rouge_perl(rouge_dir, rouge_args, model_dir, system_dir): # r = pyrouge.Rouge155(rouge_dir=rouge_dir, rouge_args=rouge_args, log_level=10) r = pyrouge.Rouge155(rouge_dir=rouge_dir, rouge_args=rouge_args) r.model_dir = model_dir r.system_dir = system_dir r.system_filename_pattern = "(\\d+).txt" r.model_filename_pattern = "#ID#.[A-Z].txt" return r
def rouge_eval(ref_dir, dec_dir): r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() return r.output_to_dict(rouge_results)
def rouge(reference, candidate, log_path, print_log, config): """ compute the rouge score :param reference: reference :param candidate: candidate :param log_path: path to log :param print_log: function to print log :param config: configuration :return: rouge-2 score """ # check if of equal amount. assert len(reference) == len(candidate) # directory for saving sentences ref_dir = log_path + 'reference/' cand_dir = log_path + 'candidate/' # check if there are directories for reference and candidate if not os.path.exists(ref_dir): os.mkdir(ref_dir) if not os.path.exists(cand_dir): os.mkdir(cand_dir) # write files for i in range(len(reference)): with codecs.open(ref_dir+"%06d_reference.txt" % i, 'w', 'utf-8') as f: f.write(" ".join(reference[i]).replace(' <\s> ', '\n') + '\n') with codecs.open(cand_dir+"%06d_candidate.txt" % i, 'w', 'utf-8') as f: f.write(" ".join(candidate[i]).replace( ' <\s> ', '\n').replace('<unk>', 'UNK') + '\n') # use pyrouge and ROUGE155 r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) # compute the scores rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) # recall recall = [round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2)] # precision precision = [round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2)] # f score f_score = [round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2)] # print print_log("F_measure: %s Recall: %s Precision: %s\n" % (str(f_score), str(recall), str(precision))) return f_score[:], recall[:], precision[:]
def rouge(reference, candidate, log_path): assert len(reference) == len(candidate) ref_dir = log_path + 'internal_tests/reference/' cand_dir = log_path + 'internal_tests/candidate/' if not os.path.exists(ref_dir): os.makedirs(ref_dir) if not os.path.exists(cand_dir): os.makedirs(cand_dir) # read original test file # with open('/home/jinhq/parser-20180522/gigaword/SDP_train/test.tgt') as f: # references = f.readlines() # with open('/home/jinhanqi/summarization/giga_seq2seq_data/gigaword/train/test.title') as f: # references = f.readlines() # with open('giga-test.ids.json') as f: # con = f.readlines() # ids = json.loads(con[0]) for i in range(len(reference)): with codecs.open(ref_dir + "%06d_reference.txt" % i, 'w', 'utf-8') as f: f.write(reference[i]) with codecs.open(cand_dir + "%06d_candidate.txt" % i, 'w', 'utf-8') as f: f.write(candidate[i].replace(' <\s> ', '\n')) r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) # command = '-e /home/jinhq/RELEASE-1.5.5/data -a -b 75 -n 2 -w 1.2 -m' # rouge_results = r.convert_and_evaluate(rouge_args=command) rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) recall = [ round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2) ] precision = [ round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2) ] f_score = [ round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2) ] print("| ROUGE F_measure: %s Recall: %s Precision: %s\n" % (str(f_score), str(recall), str(precision))) return f_score[:], recall[:], precision[:]
def rouge_eval(ref_dir, dec_dir): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" r = pyrouge.Rouge155('/home/ddd/project/rouge_files/pyrouge/tools/ROUGE-1.5.5') r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate(rouge_args='-e {}/data -a -2 -1 -c 95 -U -n 2 -w 1.2 -b 75'.format('/home/ddd/project/rouge_files/pyrouge/tools/ROUGE-1.5.5')) return r.output_to_dict(rouge_results)
def rouge_eval(ref_dir, dec_dir): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" r = pyrouge.Rouge155(rouge_dir=_ROUGE_PATH) r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() return r.output_to_dict(rouge_results)
def run(summ_path, ref_path, rouge_args=None, verbose=False, saveto=None, eos=".", ignore_empty_reference=False, ignore_empty_summary=False, stemming=True): s = settings.Settings() s._load() stime = time() with tempfile.TemporaryDirectory() as dirpath: sys_root, model_root = [ os.path.join(dirpath, _) for _ in ["system", "model"] ] print("Preparing documents...", end=" ") utils.mkdirs([sys_root, model_root]) ignored = utils.split_files( model_path=ref_path, system_path=summ_path, model_dir=model_root, system_dir=sys_root, eos=eos, ignore_empty_reference=ignore_empty_reference, ignore_empty_summary=ignore_empty_summary) print("%d line(s) ignored" % len(ignored)) print("Running ROUGE...") log_level = logging.ERROR if not verbose else None r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']), log_level=log_level, stemming=stemming) r.system_dir = sys_root r.model_dir = model_root r.system_filename_pattern = r's.(\d+).txt' r.model_filename_pattern = 'm.[A-Z].#ID#.txt' data_arg = "-e %s" % s.data['ROUGE_data'] if not rouge_args: rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a'] rouge_args_str = " ".join([str(_) for _ in rouge_args]) else: rouge_args_str = rouge_args rouge_args_str = "%s %s" % (data_arg, rouge_args_str) output = r.convert_and_evaluate(rouge_args=rouge_args_str) if saveto is not None: saveto = open(saveto, 'w') utils.tee(saveto, output) print("Elapsed time: %.3f seconds" % (time() - stime))
def rouge(reference, candidate, log_path, print_log, config): '''print(len(reference), len(candidate), candidate[:5]) len_sum = 0 for i in range(len(reference)): len_sum += len(reference[i].split()) print(len_sum/len(reference))''' assert len(reference) == len(candidate) ref_dir = log_path + 'reference/' cand_dir = log_path + 'candidate/' if not os.path.exists(ref_dir): os.mkdir(ref_dir) if not os.path.exists(cand_dir): os.mkdir(cand_dir) for i in range(len(reference)): with codecs.open(ref_dir + "%06d_reference.txt" % i, 'w', 'utf-8') as f: f.write(" ".join(reference[i]).replace(' <\s> ', '\n') + '\n') with codecs.open(cand_dir + "%06d_candidate.txt" % i, 'w', 'utf-8') as f: f.write(" ".join(candidate[i]).replace(' <\s> ', '\n').replace( '<unk>', 'UNK') + '\n') r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) recall = [ round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2) ] precision = [ round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2) ] f_score = [ round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2) ] print_log("F_measure: %s Recall: %s Precision: %s\n" % (str(f_score), str(recall), str(precision))) return f_score[:], recall[:], precision[:]
def __init__(self, measures={"rouge_1"}, beta=1): """ Initialize the evaluator. Args: measures set(str): ROUGE measures to use when computing scores. Defaults to `rouge_1`. beta (float): Valua controlloing the recall/precision trade-off when computing F_beta scores. Defaults to 1. """ self.measures = measures self.rouge = pyrouge.Rouge155(average="raw", stem=True, ignore_stopwords=True) self.beta = beta
def rouge_scores(system_dir, model_dir): ''' calculate rouge scores (dict) between reference/system summaries and model summaries :param system_dir: :param model_dir: :return: ''' r = pyrouge.Rouge155() r.system_filename_pattern = 'system.(\d+).txt' r.model_filename_pattern = 'model.#ID#.txt' r.system_dir = system_dir r.model_dir = model_dir output = r.convert_and_evaluate() return r.output_to_dict(output)
def rouge_eval(ref_dir, dec_dir): import pyrouge r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir #logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() print(r.output_to_dict(rouge_results)) print('Rouge L F1 ',r.output_to_dict(rouge_results)['rouge_l_f_score']) print('Rouge 1 F1 ',r.output_to_dict(rouge_results)['rouge_1_f_score']) print('Rouge 2 F1 ',r.output_to_dict(rouge_results)['rouge_2_f_score']) return r.output_to_dict(rouge_results)
def test_rouge(cand_file, gold_file, temp_dir, rouge_dir): with open(cand_file, 'r') as f: candidates = [line.strip() for line in f.readlines()[0].split(' . ')] with open(gold_file, 'r') as f: references = [line.strip() for line in f.readlines()] clen = len(candidates) rlen = len(references) if clen > rlen: candidates = candidates[:rlen] clen = len(candidates) print(f'# of sentences in candidate file: {clen}') print(f'# of sentences in reference file: {rlen}') # assert clen == rlen current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) tmp_dir = os.path.join(temp_dir, "rouge-tmp-{}".format(current_time)) if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) os.mkdir(tmp_dir + "/candidate") os.mkdir(tmp_dir + "/reference") try: for i in range(clen): # print(i, references[i]) if len(references[i]) < 1: continue with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(candidates[i]) with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(references[i]) r = pyrouge.Rouge155(rouge_dir) r.model_dir = tmp_dir + "/reference/" r.system_dir = tmp_dir + "/candidate/" r.model_filename_pattern = 'ref.#ID#.txt' r.system_filename_pattern = r'cand.(\d+).txt' rouge_results = r.convert_and_evaluate() # print(rouge_results) results_dict = r.output_to_dict(rouge_results) finally: pass if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir) return results_dict
def print_pyrouge(config): logging.getLogger('global').setLevel( logging.WARNING) # silence pyrouge logging r = pyrouge.Rouge155() r.system_filename_pattern = '(\d+).txt' r.model_filename_pattern = '#ID#.txt' r.system_dir = config.outdir r.model_dir = config.refdir rouge_results = r.convert_and_evaluate() rouge_dict = r.output_to_dict(rouge_results) print(rouge_results)
def rouge_eval(base_path): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" ref_dir = os.path.join(base_path, "reference") dec_dir = os.path.join(base_path, "decoded") r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() results_dict = r.output_to_dict(rouge_results) rouge_log(results_dict, base_path)
def rouge_eval(ref_str, pred_str): """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict""" for i in range(len(ref_str)): with open(ref_folder + str(i) + '_reference.txt', 'w') as f_out: f_out.write(ref_str[i]) with open(pred_folder + str(i) + '_decoded.txt', 'w') as f_out: f_out.write(pred_str[i]) r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_folder r.system_dir = pred_folder logging.getLogger('global').setLevel( logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() return r.output_to_dict(rouge_results)
def run(summ_path, ref_path, rouge_args=None, verbose=False, saveto=None, eos="."): if saveto is not None: saveto = open(saveto, 'w') s = settings.Settings() s._load() stime = time() dirpath = tempfile.mkdtemp() sys_root, model_root = [ os.path.join(dirpath, _) for _ in ["system", "model"] ] print("Preparing documents...") utils.mkdirs([sys_root, model_root]) utils.split_files(model_file=ref_path, system_file=summ_path, model_dir=model_root, system_dir=sys_root, eos=eos) print("Running ROUGE...") log_level = logging.ERROR if not verbose else None r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']), log_level=log_level) r.system_dir = sys_root r.model_dir = model_root r.system_filename_pattern = r's.(\d+).txt' r.model_filename_pattern = 'm.[A-Z].#ID#.txt' data_arg = "-e %s" % s.data['ROUGE_data'] if not rouge_args: rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a'] rouge_args_str = " ".join([str(_) for _ in rouge_args]) else: rouge_args_str = rouge_args rouge_args_str = "%s %s" % (data_arg, rouge_args_str) output = r.convert_and_evaluate(rouge_args=rouge_args_str) utils.tee(saveto, output) print("Elapsed time: %.3f seconds" % (time() - stime))
def test_rouge(cand_file, ref_file): f_cand = open(cand_file, encoding="utf-8") f_ref = open(ref_file, encoding="utf-8") current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) tmp_dir = ".rouge-tmp-{}".format(current_time) print('saving in ' + tmp_dir) try: if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) os.mkdir(tmp_dir + "/candidate") os.mkdir(tmp_dir + "/reference") candidates = [line.strip() for line in f_cand] references = [line.strip() for line in f_ref] assert len(candidates) == len(references) cnt = len(candidates) for i in range(cnt): vocab = set(candidates[i].split(' ') + references[i].split(' ')) vocab = dict([(w, str(i)) for (i, w) in enumerate(vocab)]) if len(references[i]) < 1: continue with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w", encoding="utf-8") as f: cand_i = ' '.join([vocab[c] for c in candidates[i].split(' ')]) f.write(cand_i) with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w", encoding="utf-8") as f: ref_i = ' '.join([vocab[r] for r in references[i].split(' ')]) f.write(ref_i) f_cand.close() f_ref.close() r = pyrouge.Rouge155() r.model_dir = tmp_dir + "/reference/" r.system_dir = tmp_dir + "/candidate/" r.model_filename_pattern = 'ref.#ID#.txt' r.system_filename_pattern = 'cand.(\d+).txt' rouge_results = r.convert_and_evaluate() print(rouge_results) results_dict = r.output_to_dict(rouge_results) return results_dict finally: pass if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir)
def run_p2r(summ_path, ref_path, rouge_args=None, verbose=False, saveto=None, eos=".", ignore_empty=False, stemming=False): s = f2r.settings.Settings() s._load() dirpath = tempfile.mkdtemp() sys_root, model_root = [ os.path.join(dirpath, _) for _ in ["system", "model"] ] print("Preparing documents...", end=" ") f2r.utils.mkdirs([sys_root, model_root]) ignored = f2r.utils.split_files(model_file=ref_path, system_file=summ_path, model_dir=model_root, system_dir=sys_root, eos=eos, ignore_empty=ignore_empty) log_level = logging.ERROR if not verbose else None r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']), log_level=log_level) r.system_dir = sys_root r.model_dir = model_root r.system_filename_pattern = r's.(\d+).txt' r.model_filename_pattern = 'm.[A-Z].#ID#.txt' data_arg = "-e %s" % s.data['ROUGE_data'] if not rouge_args: rouge_args = ['-c', 95, '-r', 1000, '-n', 2, '-a'] if stemming: rouge_args.append("-m") rouge_args_str = " ".join([str(_) for _ in rouge_args]) else: rouge_args_str = rouge_args rouge_args_str = "%s %s" % (data_arg, rouge_args_str) output = r.convert_and_evaluate(rouge_args=rouge_args_str) return output
def calc_rouge_score(candidate, reference, log_dir): assert len(reference) == len(candidate) ref_dir = os.path.join(log_dir, 'reference') cand_dir = os.path.join(log_dir, 'candidate') if not os.path.exists(ref_dir): os.mkdir(ref_dir) if not os.path.exists(cand_dir): os.mkdir(cand_dir) for i in range(len(reference)): with open(os.path.join(ref_dir,"%06d_reference.txt" % i), 'w', encoding='utf-8') as f: f.write(reference[i] + '\n') with open(os.path.join(cand_dir,"%06d_candidate.txt" % i), 'w', encoding='utf-8') as f: f.write(candidate[i] + '\n') #rouge_args = '-c 95 -U -r 1 -n 2 -a' r = pyrouge.Rouge155()#rouge_args=rouge_args) r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) recall = [round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_4_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2)] precision = [round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_4_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2)] f_score = [round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_4_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2)] result = "F_measure: {0} Recall: {1} Precision: {2}\n".format(str(f_score), str(recall), str(precision)) #print(result) output_dicts = {'score': {'rouge': f_score[1]}, 'logging': result} return output_dicts
def compute_rouge(x): system, reference = x rouge = pyrouge.Rouge155() # this doesn't disable the logging from perl rouge.log = logging.getLogger("pyrouge") rouge.log.setLevel(logging.ERROR) # fmt: off with tempfile.TemporaryDirectory("-system") as system_folder, \ tempfile.TemporaryDirectory("-reference") as reference_folder: # fmt: on rouge.system_dir = system_folder rouge.model_dir = reference_folder rouge.system_filename_pattern = r"(\d+).txt" rouge.model_filename_pattern = "#ID#.txt" # remove non alphabetic and numeric characters system = depunct.sub(" ", system) reference = depunct.sub(" ", reference) with open(path.join(rouge.system_dir, "1.txt"), "w", encoding="utf-8") as f: f.write(system) with open(path.join(rouge.model_dir, "1.txt"), "w", encoding="utf-8") as f: f.write(reference) output = rouge.convert_and_evaluate() result = rouge.output_to_dict(output) # close and delete folder with its contents cols = [] for metric in ["recall", "precision", "f_score"]: for n in ["1", "2", "l"]: col = "rouge_%s_%s" % (n, metric) cols.append(col) result = {c: result[c] for c in cols} return result
def test_rouge(cand_file, ref_file, idx=False): f_cand = open(cand_file, encoding="utf-8") f_ref = open(ref_file, encoding="utf-8") current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) tmp_dir = ".rouge-tmp-{}".format(current_time) try: if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) os.mkdir(tmp_dir + "/candidate") os.mkdir(tmp_dir + "/reference") candidates = [line.strip() for line in f_cand] references = [line.strip() for line in f_ref] if idx: candidates, references = turn2idx(candidates, references) assert len(candidates) == len(references) cnt = len(candidates) for i in range(cnt): if len(references[i]) < 1: continue with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(candidates[i]) with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(references[i]) f_cand.close() f_ref.close() r = pyrouge.Rouge155() r.model_dir = tmp_dir + "/reference/" r.system_dir = tmp_dir + "/candidate/" r.model_filename_pattern = 'ref.#ID#.txt' r.system_filename_pattern = 'cand.(\d+).txt' rouge_results = r.convert_and_evaluate() results_dict = r.output_to_dict(rouge_results) print(">> ROUGE(1/2/3/L/SU4): {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}".format( results_dict["rouge_1_f_score"] * 100, results_dict["rouge_2_f_score"] * 100, results_dict["rouge_3_f_score"] * 100, results_dict["rouge_l_f_score"] * 100, results_dict["rouge_su*_f_score"] * 100)) finally: if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir)
def test_rouge(temp_dir, cand, ref): candidates = [line.strip() for line in open(cand, encoding='utf-8')] references = [line.strip() for line in open(ref, encoding='utf-8')] print(len(candidates)) print(len(references)) assert len(candidates) == len(references) cnt = len(candidates) current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) tmp_dir = os.path.join(temp_dir, "rouge-tmp-{}".format(current_time)) if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) os.makedirs(tmp_dir + "/candidate") os.makedirs(tmp_dir + "/reference") try: for i in range(cnt): if len(references[i]) < 1: continue with open(tmp_dir + "/candidate/cand.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(candidates[i]) with open(tmp_dir + "/reference/ref.{}.txt".format(i), "w", encoding="utf-8") as f: f.write(references[i]) r = pyrouge.Rouge155(temp_dir=temp_dir) r.model_dir = tmp_dir + "/reference/" r.system_dir = tmp_dir + "/candidate/" r.model_filename_pattern = 'ref.#ID#.txt' r.system_filename_pattern = r'cand.(\d+).txt' rouge_results = r.convert_and_evaluate() print(rouge_results) results_dict = r.output_to_dict(rouge_results) finally: pass if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir) return results_dict
def cal_rouge(): log_path = '/home/jinhq/fairseq-master/checkpoints/' ref_dir = log_path + 'internal_tests/reference/' cand_dir = log_path + 'internal_tests/candidate/' r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_candidate.txt' r.model_dir = ref_dir r.system_dir = cand_dir logging.getLogger('global').setLevel(logging.WARNING) # command = '-e /home/jinhq/RELEASE-1.5.5/data -a -b 75 -n 2 -w 1.2 -m' # rouge_results = r.convert_and_evaluate(rouge_args=command) rouge_results = r.convert_and_evaluate() scores = r.output_to_dict(rouge_results) recall = [ round(scores["rouge_1_recall"] * 100, 2), round(scores["rouge_2_recall"] * 100, 2), round(scores["rouge_l_recall"] * 100, 2) ] precision = [ round(scores["rouge_1_precision"] * 100, 2), round(scores["rouge_2_precision"] * 100, 2), round(scores["rouge_l_precision"] * 100, 2) ] f_score = [ round(scores["rouge_1_f_score"] * 100, 2), round(scores["rouge_2_f_score"] * 100, 2), round(scores["rouge_l_f_score"] * 100, 2) ] print("| ROUGE F_measure: %s Recall: %s Precision: %s\n" % (str(f_score), str(recall), str(precision))) print(f_score) print(recall) print(precision) return f_score[:], recall[:], precision[:] # cal_rouge()